import sys
sys.path.insert(0,'../lib')
from generallib import *
connection = getConnection()
pd.set_option('max_colwidth',100)
# display(md(''))
display(md('<center><img src="verticals.png" width="300px"><center>'))
display(md('# COVID-19 Data Anomalies'))
display(md('These are data anomalies identified by Verticals, my automated analysis system.'))
display(md("Compiled "+datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")+" UTC."))
In this report:
query = """
select
id,
name,
description,
up,
down
from
verticals.metric
where
interval_type='daily'
and active=1
"""
metricdf = generateTable(query,connection,['id','name','description','up','down'])
metricdf = metricdf.set_index('id')
query = f"""
select
ma.*
from
verticals.metric_analysis ma
inner join verticals.metric_category_map mcm on ma.metric_id=mcm.metric_id
where
ma.analyzed >= current_date() -- - interval 1 day -- if testing later in day (UTC issues)
and mcm.category_id=1
order by if(ma.e = 0,10,abs((ma.projected - ma.value) / ma.e)) desc
"""
df = generateTable(query,connection,['ts','t','i','a','b','e','v','p'])
mdString = ''
rowsExist = False
for index,row in df.iterrows():
meta = metricdf.loc[row['i']]
mdString += '- [' + meta['name'] + '](#' + meta['name'].replace(' ','-') + ')' + "\n"
rowsExist = True
if not rowsExist:
mdString += "There were no anomalies yesterday."
display(md(mdString))
colors = {'GOOD':['#587834'],'BAD':['#d13525']}
def getMetricQuery(id):
return f"""
select
interval_id,
verticals.getDatetimeFromIntervalId(interval_id) as dt,
value
from
verticals.metric_data
where
interval_id >= verticals.getIntervalIdFromDatetime(current_date - interval 30 day)
and metric_id={id}
order by interval_id
"""
def getJournalQuery(country):
return f"""
select
date(verticals.getDatetimeFromIntervalId(interval_id)) as theDate,
group_concat(title separator "<br>")
from
verticals.journal j
inner join verticals.journal_category_map jcm on j.id = jcm.journal_id
left join verticals.journal_label_map jlm on j.id = jlm.journal_id
where
jcm.category_id = 1
and j.interval_id >= verticals.getIntervalIdFromDatetime(current_date - interval 14 day)
and (
(
jlm.label is null
and binary j.title like '%{country}%'
)
or jlm.label = '{country}'
)
group by theDate
order by theDate
"""
today = datetime.date.today()
today = datetime.datetime(today.year,today.month,today.day)
windowStart = today - datetime.timedelta(days=1,hours=12)
windowEnd = today - datetime.timedelta(hours=12)
for index,row in df.iterrows():
meta = metricdf.loc[row['i']]
mdf = generateTable(getMetricQuery(row['i']),connection,['interval_id','dt','value'])
display(md('# ' + meta['name']))
# figure out the color
if (row['v'] > row['p']):
whichColor = meta['up']
else:
whichColor = meta['down']
fig = generateGenericGraphDF(meta['name'],mdf,['value'],xCol='dt',labels=['value'])
ba = BoxAnnotation(left=windowStart,right=windowEnd,fill_color=colors[whichColor][0],fill_alpha=0.2)
fig.add_layout(ba)
country = meta['name'].replace(' Dead','').replace(' Confirmed','')
journal = generateTable(getJournalQuery(country),connection,['Date','Title'])
journalY = float(mdf.min()['value']) - (float(mdf.max()['value'] - mdf.min()['value']) * 0.1)
jds = ColumnDataSource(journal)
which = fig.diamond(x='Date',y=journalY,source=jds,size=12)
fig.add_tools(HoverTool(renderers=[which],tooltips=[('Events','@Title{safe}')],mode='vline'))
show(fig)
display(md('*' + meta['description'] + '*'))
display(md('Projected value for yesterday was ' + str(round(row['p'])) + ' but actual was ' + str(round(row['v']))))
if len(journal.index) > 0:
display(md('## ' + country + ' Event Journal'))
#fig,axis = getTimeline("Event Timeline",journal['Date'].apply(datetime.datetime.strftime,args=('%Y-%m-%d',)),journal['Title'].apply(lambda x,l : x[:l],args=(int(1000 / len(journal.index)),)),interval=5)
#display(fig)
display(journal.style.hide_index())