Python-Code Corona Plots (needs updating)

Python, Pandas, Matplotlib

Der wesentliche Teil des Codes zum Erzeugen der Plots. Mit Sicherheit kein Meisterwerk, suboptimal und schnell geschrieben…. . Die wesentlichen Schritte sind enthalten, so dass man selber damit eigene Ideen realisieren kann.

Datendownload, Reformatting  und Merging der separaten Datentabellen ist mit pandas realisiert, die Plots werden mit matplotlib erstellt. Für Standardplots hätte ich anstattdessen seaborn verwendet.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# corona_v2
# generate time series plots 
# from data of the 2019/2020 corona virus infection 
# published by Johns Hopkins University
# Dieter Graessle, dieter@dieter-graessle.de
# last change: 08.03.2020
 
# -*- coding: utf-8 -*-
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
 
# retrieve data from the Johns Hopkins CSSE github repository
 
retrieve = True
 
if retrieve : 
 
 
 
    confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', \
                  parse_dates = True)
 
    deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv', \
                  parse_dates = True)
 
 
    recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv', \
                  parse_dates = True)
 
 
 
# rename data columns to prepare joining of confirmed, recovered and deaths dataframes
 
conf_long = pd.melt(confirmed, id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'])
conf_long.rename(columns={'variable': 'Date', 'value': 'Confirmed'}, inplace=True)
 
deaths_long = pd.melt(deaths, id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'])
deaths_long.rename(columns={'variable': 'Date', 'value': 'Deaths'}, inplace=True)
 
recovered_long = pd.melt(recovered, id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'])
recovered_long.rename(columns={'variable': 'Date', 'value': 'Recovered'}, inplace=True)
 
#print( conf_long.head() )
#print( deaths_long.head() )
#print( recovered_long.head() )
 
# join all dataframes together to one collective dataframe "data" using
# sql-like left joins of dataframes for deaths and recovered on confirmed
data = pd.merge(conf_long, deaths_long, how='left', on= ['Province/State', 'Country/Region', 'Lat', 'Long','Date'], sort=False)
data = pd.merge(data, recovered_long, how='left', on= ['Province/State', 'Country/Region', 'Lat', 'Long','Date'], sort=False)
#print(data.head())
 
# append another data set / line with last reported data from the RKI
# data = data.append([{'Country/Region':'Germany', 'Date':'3/8/20', 'Confirmed': 900 }], ignore_index=True)
 
 
# transform Date to datetime64 format
data['Date'] = pd.to_datetime(data.Date, infer_datetime_format=True)
#print(data.info())
 
def makeplot(countrystring = "Germany", titlestring="Deutschland", logplot=False, filename=""):
 
    country = data[ data['Country/Region'] == countrystring]
    date = country.Date
    lastdate = country.Date.tail()
    confirmed = country.Confirmed
    recovered = country.Recovered
    deaths = country.Deaths
 
 
    SMALL_SIZE = 12
    MEDIUM_SIZE = 16
    BIGGER_SIZE = 20
 
    plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
    plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
    plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
    plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
    plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
    plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
 
 
    plt.rcParams['figure.figsize'] = [15, 10]
    fig,ax1 = plt.subplots()
    ax1.xaxis_date()
    ax1.plot(date, confirmed,'ro-',  linewidth=2, markersize=16, label='CONFIRMED')
    ax1.plot(date, recovered, 'bo-',  linewidth=2, markersize=16, label='RECOVERED')
    ax1.plot(date, deaths, 'ko-',  linewidth=2, markersize=16, label='DEATHS')
    ax1.legend()
 
    #ax1.set_facecolor('xkcd:mint green')
 
    monthyearFmt = mdates.DateFormatter('%Y %m %d')
    dayFmt = mdates.DateFormatter('%d')
    ax1.xaxis.set_major_formatter(monthyearFmt)
    ax1.xaxis.set_minor_formatter(dayFmt)
    if logplot: ax1.set_yscale('log')
    #_ = plt.xticks(rotation=90)
    locs, labels = plt.xticks()
    fig.autofmt_xdate()
    #plt.setp(labels, rotation=45)
    ax1.set_xlabel('Datum \
                    \n\n\n Daten:  \
                    \n CSSE at Johns Hopkins University https://github.com/CSSEGISandData \
                    \n Robert Koch Institut https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Fallzahlen.html \
                    \n \n Keine Gewähr für Richtigkeit, Vollständigkeit und Aktualität der Daten und Abbildungen. \
                    \n\n (copyright: Dieter Graessle, 2020, dieter@dieter-graessle.de, frei für nicht kommerzielle Verwendung)')
    ax1.set_ylabel('Anzahl bestätigter Fälle in Deutschland')
    titlestring = "Zeitliche Entwicklung der COVID-19-Fälle in " + titlestring + "\n"
    #ax1.set_title('Zeitliche Entwicklung bestätigter COVID-19-Fälle in Deutschland\n')
    ax1.set_title(titlestring)
    ax1.grid(which = 'major', color='k',  linewidth=2)
    ax1.grid(which='minor', b=True, color='k')
    if filename == "" : 
        filename = "coronacurve.png"
        if logplot: filename = "coronacurve_log.png"
    print (filename)
    plt.savefig(filename, bbox_inches='tight')
    plt.show()
 
makeplot("Germany", "Deutschland", logplot=False, filename = "coronacurve.png")
 
makeplot("Germany", "Deutschland", logplot=True, filename = "coronacurve_log.png)