In [1]:
import sys
sys.path.insert(0,'../lib')
from generallib import *

connection = getConnection()

pd.set_option('max_colwidth',100)
In [2]:
# display(md('![Verticals](verticals.png)'))
display(md('<center><img src="verticals.png" width="300px"><center>'))

In [3]:
display(md('# COVID-19 Data Anomalies'))

COVID-19 Data Anomalies


In [4]:
display(md('These are data anomalies identified by Verticals, my automated analysis system.'))
display(md("Compiled "+datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")+" UTC."))

These are data anomalies identified by Verticals, my automated analysis system.

Compiled 2020-08-12 12:25:05 UTC.


In this report:

In [5]:
query = """
select
    id,
    name,
    description,
    up,
    down
from
    verticals.metric
where
    interval_type='daily'
    and active=1
"""
metricdf = generateTable(query,connection,['id','name','description','up','down'])
metricdf = metricdf.set_index('id')

query = f"""
select
    ma.*
from
    verticals.metric_analysis ma
    inner join verticals.metric_category_map mcm on ma.metric_id=mcm.metric_id
where
    ma.analyzed >= current_date() -- - interval 1 day -- if testing later in day (UTC issues)
    and mcm.category_id=1
order by if(ma.e = 0,10,abs((ma.projected - ma.value) / ma.e)) desc
"""
df = generateTable(query,connection,['ts','t','i','a','b','e','v','p'])

mdString = ''
for index,row in df.iterrows():
    meta = metricdf.loc[row['i']]
    mdString += '- [' + meta['name'] + '](#' + meta['name'].replace(' ','-') + ')' + "\n"
    
display(md(mdString))

In [6]:
colors = {'GOOD':['#587834'],'BAD':['#d13525']}

def getMetricQuery(id):
    return f"""
    select
        interval_id,
        verticals.getDatetimeFromIntervalId(interval_id) as dt,
        value
    from
        verticals.metric_data
    where
        interval_id >= verticals.getIntervalIdFromDatetime(current_date - interval 30 day)
        and metric_id={id}
    order by interval_id
    """

def getJournalQuery(country):
    return f"""
    select
        date(verticals.getDatetimeFromIntervalId(interval_id)) as theDate,
        group_concat(title separator "<br>")
    from
        verticals.journal j
        inner join verticals.journal_category_map jcm on j.id = jcm.journal_id
        left join verticals.journal_label_map jlm on j.id = jlm.journal_id
    where
        jcm.category_id = 1
        and j.interval_id >= verticals.getIntervalIdFromDatetime(current_date - interval 14 day)
        and (
                (
                jlm.label is null
                and binary j.title like '%{country}%'
                )
            or jlm.label = '{country}'
            )
    group by theDate
    order by theDate
    """


today = datetime.date.today()
today = datetime.datetime(today.year,today.month,today.day)
windowStart = today - datetime.timedelta(days=1,hours=12)
windowEnd = today - datetime.timedelta(hours=12)
    
for index,row in df.iterrows():
    meta = metricdf.loc[row['i']]
    mdf = generateTable(getMetricQuery(row['i']),connection,['interval_id','dt','value'])
    display(md('# ' + meta['name']))
    
    # figure out the color
    if (row['v'] > row['p']):
        whichColor = meta['up']
    else:
        whichColor = meta['down']
        
    fig = generateGenericGraphDF(meta['name'],mdf,['value'],xCol='dt',labels=['value'])
    ba = BoxAnnotation(left=windowStart,right=windowEnd,fill_color=colors[whichColor][0],fill_alpha=0.2)
    fig.add_layout(ba)
    
    country = meta['name'].replace(' Dead','').replace(' Confirmed','')
    journal = generateTable(getJournalQuery(country),connection,['Date','Title'])
    journalY = float(mdf.min()['value']) - (float(mdf.max()['value'] - mdf.min()['value']) * 0.1)
    jds = ColumnDataSource(journal)
    which = fig.diamond(x='Date',y=journalY,source=jds,size=12)
    fig.add_tools(HoverTool(renderers=[which],tooltips=[('Events','@Title{safe}')],mode='vline'))
    
    show(fig)
    display(md('*' + meta['description'] + '*'))
    display(md('Projected value for yesterday was ' + str(round(row['p'])) + ' but actual was ' + str(round(row['v']))))
    
    if len(journal.index) > 0:
        display(md('## ' + country + ' Event Journal'))
        #fig,axis = getTimeline("Event Timeline",journal['Date'].apply(datetime.datetime.strftime,args=('%Y-%m-%d',)),journal['Title'].apply(lambda x,l : x[:l],args=(int(1000 / len(journal.index)),)),interval=5)
        #display(fig)
        display(journal.style.hide_index())