Links

Transform sitemap to dataframe

Tags: #xml #file #tool #operations #automation #dataframe
Author: Jeremy Ravenel

Input

Import library

import naas
import json
try:
import xmltodict
except:
!pip install xmltodict
import xmltodict
import pandas as pd
import requests

Choose the website you want

website = "https://zapier.com"

Model

Get your Dataframe

def sitemap_to_df(url):
df = None
key = "urlset.url.url"
r = requests.get(f'{url}/sitemap.xml')
data_dict = xmltodict.parse(r.content)
if key and len(key.split('.')) > 0:
keys = key.split('.')
keys.reverse()
data = data_dict.get(keys.pop())
while(len(keys) > 1):
data = data.get(keys.pop())
df = pd.DataFrame.from_dict(data=data)
elif key and data_dict.get(key):
df = pd.DataFrame.from_dict(data=data_dict.get(key))
else:
df = pd.DataFrame.from_dict(data=data_dict)
return df
df = sitemap_to_df(website)

Output

Display result

df

Set the timezone

naas.get_remote_timezone()
naas.set_remote_timezone("Europe/Lisbon")
Last modified 1mo ago