Comment on page
Maintain profile posts stats database
Tags: #linkedin #profile #post #stats #naas_drivers #content #automation #csv
Last update: 2023-08-23 (Created: 2023-08-23)
Description: This notebook fetches your profile's post statistics from LinkedIn and stores them in a CSV file. It then updates a select number of entries to track the progress of your statistics over time. This method helps to minimize the number of requests made to the LinkedIn API, reducing the risk of being banned due to excessive usage. Additionally, this CSV database can be conveniently reused in other processes, such as retrieving interactions from post URLs.
Disclaimer:
This code is in no way affiliated with, authorized, maintained, sponsored or endorsed by Linkedin or any of its affiliates or subsidiaries. It uses an independent and unofficial API. Use at your own risk.
This project violates Linkedin's User Agreement Section 8.2, and because of this, Linkedin may (and will) temporarily or permanently ban your account. We are not responsible for your account being banned.
from naas_drivers import linkedin
import pandas as pd
from datetime import datetime
import naas
Mandatory
li_at
: Cookie used to authenticate Members and API clients.JSESSIONID
: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation.linkedin_url
: This variable represents the LinkedIn profile URL.
Optional
csv_output
: CSV file path to be saved in your local.limit
: The initial number of posts to be fetched during the first execution.update
: The number of posts to be refreshed in each update.cron
: This variable represents the CRON syntax used to run the scheduler. More information here: https://crontab.guru/#0_12,18___1-5refresh_interval
: This variable sets the minimum time interval (in minutes) for data refresh when using this template manually. This helps to prevent excessive calls to the LinkedIn API.
# Mandatory
li_at = naas.secret.get("LINKEDIN_LI_AT") or "YOUR_LINKEDIN_LI_AT" #example: AQFAzQN_PLPR4wAAAXc-FCKmgiMit5FLdY1af3-2
JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID") or "YOUR_LINKEDIN_JSESSIONID" #example: ajax:8379907400220387585
linkedin_url = "https://www.linkedin.com/in/xxxxx/" # EXAMPLE "https://www.linkedin.com/in/XXXXXX/"
# Optional
csv_output = f"LINKEDIN_PROFILE_POSTS.csv"
limit = 5
update = 3
cron = "0 12,18 * * 1-5" #At minute 0 past hour 12 and 18 on every day-of-week from Monday through Friday.
refresh_interval = 30
def read_csv(file_path):
try:
df = pd.read_csv(file_path)
except FileNotFoundError as e:
# Empty dataframe returned
return pd.DataFrame()
return df
df_posts = read_csv(csv_output)
print("✅ Posts fetched:", len(df_posts))
df_posts.head(1)
def update_posts(
li_at,
JSESSIONID,
df_posts,
linkedin_url,
limit=5,
update=3,
refresh_interval=60,
key="POST_URL",
):
# Init output
df = pd.DataFrame()
df_new = pd.DataFrame()
# Init df posts is empty then return entire database
if len(df_posts) > 0:
if "DATE_EXTRACT" in df_posts.columns:
last_update_date = df_posts["DATE_EXTRACT"].max()
time_last_update = datetime.now() - datetime.strptime(
last_update_date, "%Y-%m-%d %H:%M:%S"
)
minute_last_update = time_last_update.total_seconds() / 60
if minute_last_update > refresh_interval:
# If df posts not empty get the last X posts (new and already existing)
df_new = linkedin.connect(li_at, JSESSIONID).profile.get_posts_feed(
linkedin_url,
limit=update
)
else:
print(
f"🛑 Nothing to update. Last update done {int(minute_last_update)} minutes ago."
)
else:
df_new = linkedin.connect(li_at, JSESSIONID).profile.get_posts_feed(
linkedin_url,
limit=limit
)
# Concat, save database in CSV and dependency in production
df = pd.concat([df_new, df_posts]).drop_duplicates(key, keep="first")
# Return all posts
print(f"✅ Updated posts fetched:", len(df))
return df.reset_index(drop=True)
df_update = update_posts(
li_at,
JSESSIONID,
df_posts,
linkedin_url,
limit=limit,
update=update,
refresh_interval=refresh_interval
)
df_update.head(1)
# Save dataframe in CSV
df_update.to_csv(csv_output, index=False)
# Send CSV to production (It could be used with other scripts)
naas.dependency.add(csv_output)
naas.scheduler.add(cron=cron)
# to de-schedule this notebook, simply run the following command:
# naas.scheduler.delete()
Last modified 3mo ago