Update linkedinbio from google
Tags: #hubspot #crm #sales #contact #naas_drivers #googlesearch #scheduler #naas
Author: Florent Ravenel

Input

Import library

1
from naas_drivers import hubspot
2
import naas
3
import pandas as pd
4
from googlesearch import search
5
import time
6
import re
Copied!

Setup your HubSpot

👉 Access your HubSpot API key
1
HS_API_KEY = 'YOUR_HUBSPOT_API_KEY'
Copied!

Schedule your notebook everyday

1
#-> Uncomment the 2 lines below (by removing the hashtag) to schedule your job everyday at 8:00 AM (NB: you can choose the time of your scheduling bot)
2
# import naas
3
# naas.scheduler.add(cron="0 8 * * *")
4
5
#-> Uncomment the line below (by removing the hashtag) to remove your scheduler
6
# naas.scheduler.delete()
Copied!

Model

Get all contacts in Hubspot

1
properties_list = [
2
"hs_object_id",
3
"firstname",
4
"lastname",
5
"linkedinbio",
6
]
7
hubspot_contacts = hubspot.connect(HS_API_KEY).contacts.get_all(properties_list)
8
hubspot_contacts
Copied!

Filter to get linkedinbio "Not Defined" and "firstname" and "lastname" defined

1
df_to_update = hubspot_contacts.copy()
2
3
# Cleaning
4
df_to_update = df_to_update.fillna("Not Defined")
5
6
# Filter on "Not defined"
7
df_to_update = df_to_update[(df_to_update.firstname != "Not Defined") &
8
(df_to_update.lastname != "Not Defined") &
9
(df_to_update.linkedinbio == "Not Defined")].reset_index(drop=True)
10
11
df_to_update
Copied!

Search bio in Google with firstname and lastname

1
def get_bio(firstname, lastname):
2
# Init linkedinbio
3
linkedinbio = None
4
5
# Create query
6
query = f"{firstname}+{lastname}+Linkedin"
7
print("Google query: ", query)
8
9
# Search in Google
10
for i in search(query, tld="com", num=10, stop=10, pause=2):
11
pattern = "https:\/\/.+.linkedin.com\/in\/.([^?])+"
12
result = re.search(pattern, i)
13
14
# Return value if result is not None
15
if result != None:
16
linkedinbio = result.group(0).replace(" ", "")
17
return linkedinbio
18
else:
19
time.sleep(2)
20
return linkedinbio
Copied!
1
for _, row in df_to_update.iterrows():
2
firstname = row.firstname
3
lastname = row.lastname
4
5
# Get linkedinbio
6
linkedinbio = get_bio(firstname, lastname)
7
df_to_update.loc[_, "linkedinbio"] = linkedinbio
8
9
df_to_update
Copied!

Output

Update linkedinbio in Hubspot

1
for _, row in df_to_update.iterrows():
2
# Init data
3
data = {}
4
5
# Get data
6
hs_object_id = row.hs_object_id
7
linkedinbio = row.linkedinbio
8
9
# Update LK Bio
10
if linkedinbio != None:
11
data = {"properties": {"linkedinbio": linkedinbio}}
12
hubspot.connect(HS_API_KEY).contacts.patch(hs_object_id, data)
Copied!
Copy link
Edit on GitHub