Most populated countries
Tags: #worldbank #opendata
Author: Jeremy Ravenel
Notebook d'exemple pour classer les pays les plus peuplés
Sources: OECD -> Organisation for economic co-operation and Development

Input

Import library

1
import pandas as pd
2
import matplotlib.pyplot as plt
3
%matplotlib inline
4
import requests
5
import io
6
import numpy as np
7
import plotly.graph_objects as go
8
import plotly.express as px
9
from pydrive.auth import GoogleAuth
10
from pydrive.drive import GoogleDrive
11
from google.colab import auth
12
from oauth2client.client import GoogleCredentials
13
from pandas import DataFrame
14
import plotly.graph_objects as go
Copied!

Model

Lets search the file frome gdrive

1
auth.authenticate_user()
2
gauth = GoogleAuth()
3
gauth.credentials = GoogleCredentials.get_application_default()
4
drive = GoogleDrive(gauth)
5
downloaded = drive.CreateFile({'id':"1FjX4NTIq1z3zS9vCdAdpddtj9mKa0wIW"}) # replace the id with id of file you want to access
6
downloaded.GetContentFile('POP_PROJ_20042020112713800.csv')
Copied!

Stock the data in a variable

1
data = pd.read_csv("POP_PROJ_20042020112713800.csv", usecols=["Country", "Time", "Value"])
2
data.rename(columns = {'Country':'COUNTRY', 'Time':'TIME',
3
'Value':'VALUE'}, inplace = True)
4
data
Copied!

Fonction

1
firstOccur = []
2
secondOccur = []
3
firstYear = 2000
4
secondYear = 2030
5
def tambouille_first(number1):
6
first = []
7
for index, row in data.iterrows():
8
if(row["TIME"] == number1):
9
first.append(row)
10
11
first = DataFrame(first)
12
first = first.sort_values(by ="VALUE",ascending=True)
13
first = first.tail(10)
14
return first
15
16
def tambouille_second(number2):
17
second = []
18
for index, row in data.iterrows():
19
if(row["TIME"] == number2):
20
second.append(row)
21
22
second = DataFrame(second)
23
second =second.sort_values(by ="VALUE",ascending=True)
24
second = second.tail(10)
25
return second
26
27
firstOccur = tambouille_first(firstYear)
28
secondOccur = tambouille_second(secondYear)
29
30
firstOccur
Copied!

Output

Display plot

1
fig = go.Figure(data=[
2
go.Bar(name=str(firstYear), y=firstOccur["COUNTRY"], x=firstOccur["VALUE"],orientation='h'),
3
go.Bar(name=str(secondYear), y=secondOccur["COUNTRY"], x=secondOccur["VALUE"],orientation='h'),
4
])
5
fig.update_layout(title_text="TOP 10 des pays les plus peuplés en 2000 avec prévision 2030", annotations=[
6
dict(
7
x=1,
8
y=-0.15,
9
showarrow=False,
10
text="Source : OECD -> 2019",
11
xref="paper",
12
yref="paper"
13
)])
14
fig.show()
Copied!
Tutorial video (in french) https://drive.google.com/file/d/14QhRJTWxlV6HyHmrLuSGsJ6NuFrV2GCZ/view
Copy link
Edit on GitHub