Transform sitemap to dataframe
Tags: #xml #file #tool
Author: Jeremy Ravenel

Input

Import library

1
import naas
2
import json
3
try:
4
import xmltodict
5
except:
6
!pip install xmltodict
7
import xmltodict
8
import pandas as pd
9
import requests
Copied!

Choose the website you want

1
website = "https://zapier.com"
Copied!

Model

Get your Dataframe

1
def sitemap_to_df(url):
2
df = None
3
key = "urlset.url.url"
4
r = requests.get(f'{url}/sitemap.xml')
5
data_dict = xmltodict.parse(r.content)
6
if key and len(key.split('.')) > 0:
7
keys = key.split('.')
8
keys.reverse()
9
data = data_dict.get(keys.pop())
10
while(len(keys) > 1):
11
data = data.get(keys.pop())
12
df = pd.DataFrame.from_dict(data=data)
13
elif key and data_dict.get(key):
14
df = pd.DataFrame.from_dict(data=data_dict.get(key))
15
else:
16
df = pd.DataFrame.from_dict(data=data_dict)
17
return df
Copied!
1
df = sitemap_to_df(website)
Copied!

Output

Display result

1
df
Copied!

Set the timezone

1
naas.get_remote_timezone()
Copied!
1
naas.set_remote_timezone("Europe/Lisbon")
Copied!
Last modified 2mo ago
Copy link
Edit on GitHub