World population evolution and projections
Tags: #worldometer #opendata #population
Author: Jeremy Ravenel

Input

Import libraries

1
import pandas as pd
2
import plotly.express as px
3
from bs4 import BeautifulSoup
4
import requests
Copied!

Data to scrap tables

1
DATA_URLS = ["https://www.worldometers.info/world-population/world-population-by-year/",
2
"https://www.worldometers.info/world-population/world-population-projections/"
3
]
4
5
TABLE_COLS = ['Year',
6
'World Population',
7
'YearlyChange',
8
'NetChange',
9
'Density(P/Km²)',
10
'UrbanPop',
11
'UrbanPop %']
Copied!

Model

Functions to scrap tables on several sites, and merge them

1
# Generic functions
2
3
def scrap_table(url, table_cloumns):
4
page = requests.get(url)
5
soup = BeautifulSoup(page.text, 'html.parser')
6
dfs = pd.read_html(page.text)
7
8
for df in dfs:
9
if df.columns.to_list() == table_cloumns:
10
return df
11
return None
12
13
def merge_tables_from_urls(urls, table_columns):
14
table = None
15
for url in urls:
16
new_value = scrap_table(url, table_columns)
17
if new_value is not None:
18
if table is None:
19
table = new_value
20
else:
21
table = table.append(new_value)
22
return table
Copied!
1
table
Copied!

Create function to display graph

1
def create_graph(x_label, y_label, table, title="", graph_type=px.line):
2
fig = graph_type(table, x=x_label, y=y_label, title=title)
3
fig.show()
Copied!
1
# Print population graph from year to year
2
def display_population_graph(table, x_from=None, x_to=None, graph_type=px.line):
3
x_label = TABLE_COLS[0]
4
y_label = TABLE_COLS[1]
5
if x_from is not None:
6
table = table[table.Year >= x_from]
7
if x_to is not None:
8
table = table[table.Year <= x_to]
9
title = f"{y_label} by {x_label}, between {table[x_label].to_list()[-1]} and {table[x_label].to_list()[0]}"
10
create_graph(x_label, y_label, table, title, graph_type)
Copied!

Fetch tables, sort the result and remove duplicate data

1
table = merge_tables_from_urls(DATA_URLS, TABLE_COLS)
2
3
table = table.sort_values(by=[TABLE_COLS[0]], ascending=False)
4
5
table.drop_duplicates(subset=TABLE_COLS[0], keep="first", inplace=True)
Copied!

Output

Display the graph between -5000 and 2100

1
chart1 = display_population_graph(table)
Copied!

Display the graph between 1800 and 2020

1
display_population_graph(table, x_from=1800, x_to=2020)
Copied!

Display the graph between 2000 and 2100

1
display_population_graph(table, x_from=2000, x_to=2100)
Copied!

Display a barchart between 2000 and 2100

The graph type can be change by passing a graph function as 'graph_type' (graph_type=px.line, etc)
1
display_population_graph(table, x_from=1950, x_to=2100, graph_type=px.bar)
Copied!
Last modified 2mo ago
Copy link
Edit on GitHub