1,先取得數據
我找的是這個地址:https://github.com/CSSEGISandData/COVID-19
進入到目錄:/blob/master/csse_covid_19_data/csse_covid_19_time_series/
下載time_series_19-covid-Confirmed.csv這個文件
這個來源應該是The Center for Systems Science and Engineering (CSSE) at JHU(Johns Hopkins University,約翰斯·霍普金斯大學)
2,導入各種需要用到的模塊
<code>import osimport sysimport numpy as npimport pandas as pdfrom pandas import DataFramefrom matplotlib import cmimport matplotlibimport matplotlib.pyplot as pltimport matplotlib.ticker as tickerimport matplotlib.animation as animationmatplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 用來正常顯示中文標籤matplotlib.rcParams['axes.unicode_minus'] = False # 用來正常顯示負號/<code>
3,讀入文件
<code>def get_data_filepath(): cur_dir = os.path.split(os.path.realpath(__file__))[0] # 得到當前路徑 out_json_path = cur_dir + '/time_series_19-covid-Confirmed.csv' return out_json_pathdef get_df_all(): df = pd.read_csv(get_data_filepath(),header=0,encoding="utf-8") return dfdf_all = get_df_all()print(df_all)/<code>
4,加工某一天的數據
<code>def get_df(day): # 去掉大陸數據,只統計國外的 df = df_all[-df_all['Country/Region'].isin(['Mainland China'])] # 只取某一天的數據 df = df[['Country/Region',day]] # 按照地區分組取總數 df = df.groupby('Country/Region', as_index=False)[day].sum() # 按照總數排序 df = df.sort_values(by=day, ascending=True) return dfdef draw_barchart(day): dff = get_df(day) print(dff)df_all = get_df_all()draw_barchart('2/24/20')/<code>
5,先出圖,不過暫時不好看
<code>def draw_barchart(day): dff = get_df(day) # 柱狀圖 ax.barh(dff['Country/Region'], dff[day]) plt.show()df_all = get_df_all()# 變量定義到函數外面來,因為要反覆調用fig, ax = plt.subplots(figsize=(8, 6))draw_barchart('2/24/20')/<code>
6,加入顏色
<code>def draw_barchart(day): dff = get_df(day) color_list = [colors_dict[x] for x in dff['Country/Region']] # 柱狀圖 ax.barh(dff['Country/Region'], dff[day],color=color_list) plt.show()df_all = get_df_all()# 這裡的分組排序等操作只是為了得到顏色列表df_temp = df_all.groupby('Country/Region', as_index=False)['2/24/20'].sum()df_temp = df_temp.sort_values(by='2/24/20', ascending=True)# 變量定義到函數外面來,因為要反覆調用fig, ax = plt.subplots(figsize=(8, 6))country_region = df_temp['Country/Region'].unique()colors = cm.jet(np.linspace(0.1, 0.9, len(country_region)))colors_dict = dict(zip(country_region,colors))draw_barchart('2/24/20')/<code>
7,繼續美化
<code>def draw_barchart(day): dff = get_df(day) color_list = [colors_dict[x] for x in dff['Country/Region']] # 柱狀圖 ax.barh(dff['Country/Region'], dff[day],color=color_list) for i, (value, name) in enumerate(zip(dff[day], dff['Country/Region'])): ax.text(value, i - .25, name + ' ' + str(value), ha='left', size=8, color='#777777') # 確診 ax.text(1, 0.1, day, transform=ax.transAxes, color='#777777', size=46, ha='right') ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}')) ax.xaxis.set_ticks_position('top') ax.tick_params(axis='x', colors='#777777', labelsize=12) ax.set_yticks([]) ax.grid(which='major', axis='x', linestyle='-') ax.set_axisbelow(True) ax.text(0, 1.12, '確診人數', transform=ax.transAxes, size=24, weight=600, ha='left') ax.text(1, 0, '作者:[email protected]', transform=ax.transAxes, ha='right', color='#777777', bbox=dict(facecolor='white', alpha=0.8, edgecolor='white')) plt.box(False) fig.tight_layout() # 圖像外部邊緣的調整 plt.show()/<code>
8,生成動畫
<code>def save_ImageMagickWriter(ani): writer = animation.ImageMagickWriter(codec='h264') ani.save('bar_chart_race/ImageMagickWriter.gif', writer=writer)def animate(): dr = df_all.columns.values[4:] ani = animation.FuncAnimation(fig, draw_barchart, frames=dr, interval=200, repeat=False) save_ImageMagickWriter(ani)df_all = get_df_all()# 這裡的分組排序等操作只是為了得到顏色列表df_temp = df_all.groupby('Country/Region', as_index=False)['2/24/20'].sum()df_temp = df_temp.sort_values(by='2/24/20', ascending=True)# 變量定義到函數外面來,因為要反覆調用fig, ax = plt.subplots(figsize=(8, 6))country_region = df_temp['Country/Region'].unique()colors = cm.jet(np.linspace(0.1, 0.9, len(country_region)))colors_dict = dict(zip(country_region,colors))# draw_barchart('2/24/20')animate()/<code>
9,得到動圖
閱讀更多 大夢想家張志宇 的文章