Post daily jobs on slack
Tags: #remotive #jobs #slack #gsheet #naas_drivers #automation
Author: Sanjeet Attili

Input

Import libraries

1
import pandas as pd
2
from bs4 import BeautifulSoup
3
import requests
4
from datetime import datetime
5
import time
6
from naas_drivers import gsheet, slack
7
import naas
Copied!

Setup slack channel configuration

1
SLACK_TOKEN = "xoxb-1481042297777-3085654341191-xxxxxxxxxxxxxxxxxxxxxxxxx"
2
SLACK_CHANNEL = "05_work"
Copied!

Setup sheet log data

1
spreadsheet_id = "1EBefhkbmqaXMZLRCiafabf6xxxxxxxxxxxxxxxxxxx"
2
sheet_name = "SLACK_CHANNEL_POSTS"
Copied!

Setup Remotive

Get categories from Remotive
1
def get_remotejob_categories():
2
req_url = f"https://remotive.io/api/remote-jobs/categories"
3
res = requests.get(req_url)
4
try:
5
res.raise_for_status()
6
except requests.HTTPError as e:
7
return e
8
res_json = res.json()
9
10
# Get categories
11
jobs = res_json.get('jobs')
12
return pd.DataFrame(jobs)
13
14
df_categories = get_remotejob_categories()
15
df_categories
Copied!
Enter your parameters
1
categories = ['data'] # Pick the list of categories in columns "slug"
2
date_from = - 10 # Choose date difference in days from now => must be negative
Copied!

Set the Scheduler

1
naas.scheduler.add(recurrence="0 9 * * *")
2
# # naas.scheduler.delete() # Uncomment this line to delete your scheduler if needed
Copied!

Model

Get the sheet log of jobs

1
df_jobs_log = gsheet.connect(spreadsheet_id).get(sheet_name=sheet_name)
2
df_jobs_log
Copied!

Get all jobs posted after timestamp_date

All jobs posted after the date from will be fetched. In summary, we can set the value, in seconds, of 'search_data_from' to fetch all jobs posted since this duration
1
REMOTIVE_DATETIME = "%Y-%m-%dT%H:%M:%S"
2
NAAS_DATETIME = "%Y-%m-%d %H:%M:%S"
3
4
def get_remotive_jobs_since(jobs, date):
5
ret = []
6
for job in jobs:
7
publication_date = datetime.strptime(job['publication_date'], REMOTIVE_DATETIME).timestamp()
8
if publication_date > date:
9
ret.append({
10
'URL': job['url'],
11
'TITLE': job['title'],
12
'COMPANY': job['company_name'],
13
'PUBLICATION_DATE': datetime.fromtimestamp(publication_date).strftime(NAAS_DATETIME)
14
})
15
return ret
16
17
def get_category_jobs_since(category, date, limit):
18
url = f"https://remotive.io/api/remote-jobs?category={category}&limit={limit}"
19
res = requests.get(url)
20
if res.json()['jobs']:
21
publication_date = datetime.strptime(res.json()['jobs'][-1]['publication_date'], REMOTIVE_DATETIME).timestamp()
22
if len(res.json()['jobs']) < limit or date > publication_date:
23
print(f"Jobs from catgory {category} fetched ✅")
24
return get_remotive_jobs_since(res.json()['jobs'], date)
25
else:
26
return get_category_jobs_since(category, date, limit + 5)
27
return []
28
29
def get_jobs_since(categories: list,
30
date_from: int):
31
if date_from >= 0:
32
return("'date_from' must be negative. Please update your parameter.")
33
# Transform datefrom int to
34
search_jobs_from = date_from * 24 * 60 * 60 # days in seconds
35
timestamp_date = time.time() + search_jobs_from
36
37
jobs = []
38
for category in categories:
39
jobs += get_category_jobs_since(category, timestamp_date, 5)
40
print(f'- All job since {datetime.fromtimestamp(timestamp_date)} have been fetched -')
41
return pd.DataFrame(jobs)
42
43
df_jobs = get_jobs_since(categories, date_from=date_from)
44
df_jobs
Copied!

Remove duplicate jobs

1
def remove_duplicates(df1, df2):
2
# Get jobs log
3
jobs_log = df1.URL.unique()
4
5
# Exclude jobs already log from jobs
6
df2 = df2[~df2.URL.isin(jobs_log)]
7
return df2.sort_values(by="PUBLICATION_DATE")
8
9
df_new_jobs = remove_duplicates(df_jobs_log, df_jobs)
10
df_new_jobs
Copied!

Output

Add new jobs on the sheet log

1
gsheet.connect(spreadsheet_id).send(sheet_name=sheet_name,
2
data=df_new_jobs,
3
append=True)
Copied!
1
if len(df_new_jobs) > 0:
2
for _, row in df_new_jobs.iterrows():
3
url = row.URL
4
slack.connect(SLACK_TOKEN).send(SLACK_CHANNEL, f"<{url}>")
5
else:
6
print("Nothing to published in Slack !")
Copied!
Copy link
Edit on GitHub