Links

List social network links from website

Tags: #beautifulsoup #webscraping #python #html #css #url
Author: Florent Ravenel
Last update: 2023-05-02 (Created: 2023-05-02)
Description: This notebook will use BeautifulSoup to list all the social network links from a website. It is usefull for organizations to quickly get a list of all the social networks they are present on.
References:

Input

Import libraries

import requests
from bs4 import BeautifulSoup

Setup Variables

  • url: The URL of the website you want to extract social network links from
  • social_network_links: List of social network links extracted from website
# Inputs
url = "https://www.naas.ai/"
# Outputs
social_network_links = []

Model

def get_social_network_links(url, social_network_links):
# Make a GET request to the URL and get the HTML content
response = requests.get(url)
html_content = response.text
# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Find all the links on the page
links = soup.find_all('a')
# Loop through the links and find the social network links
social_networks = ['facebook', 'twitter', 'linkedin', 'instagram', 'github', 'youtube']
for link in links:
href = link.get('href')
if href:
if "github" in href:
org = href.split("github.com/")[-1].split("/")[0]
if org not in ["orgs", "sponsors"]:
href = f'https://github.com/{org}'
else:
href = ""
for social in social_networks:
if social in href:
if href not in social_network_links:
social_network_links.append(href)
return social_network_links

Output

social_network_links = get_social_network_links(url, social_network_links)
social_network_links