Get jobs from categories
Tags: #remotive #jobs #csv #snippet
Author: Sanjeet Attili
With this notebook, you will be able to get jobs offer from Remotive:
  • URL: Job offer url.
  • TITLE: Job title.
  • COMPANY: Company name.
  • PUBLICATION_DATE: Date of publication.

Input

Import libraries

1
import pandas as pd
2
import requests
3
import time
4
from datetime import datetime
Copied!

Setup Remotive

Get categories from Remotive
1
def get_remotejob_categories():
2
req_url = f"https://remotive.io/api/remote-jobs/categories"
3
res = requests.get(req_url)
4
try:
5
res.raise_for_status()
6
except requests.HTTPError as e:
7
return e
8
res_json = res.json()
9
10
# Get categories
11
jobs = res_json.get('jobs')
12
return pd.DataFrame(jobs)
13
14
df_categories = get_remotejob_categories()
15
df_categories
Copied!
Enter your parameters
1
categories = ['data'] # Pick the list of categories in columns "slug"
2
date_from = - 10 # Choose date difference in days from now => must be negative
Copied!

Variables

1
csv_output = "REMOTIVE_JOBS.csv"
Copied!

Model

Get all jobs posted after timestamp_date

All jobs posted after the date from will be fetched. In summary, we can set the value, in seconds, of 'search_data_from' to fetch all jobs posted since this duration
1
REMOTIVE_DATETIME = "%Y-%m-%dT%H:%M:%S"
2
NAAS_DATETIME = "%Y-%m-%d %H:%M:%S"
3
4
def get_remotive_jobs_since(jobs, date):
5
ret = []
6
for job in jobs:
7
publication_date = datetime.strptime(job['publication_date'], REMOTIVE_DATETIME).timestamp()
8
if publication_date > date:
9
ret.append({
10
'URL': job['url'],
11
'TITLE': job['title'],
12
'COMPANY': job['company_name'],
13
'PUBLICATION_DATE': datetime.fromtimestamp(publication_date).strftime(NAAS_DATETIME)
14
})
15
return ret
16
17
def get_category_jobs_since(category, date, limit):
18
url = f"https://remotive.io/api/remote-jobs?category={category}&limit={limit}"
19
res = requests.get(url)
20
if res.json()['jobs']:
21
publication_date = datetime.strptime(res.json()['jobs'][-1]['publication_date'], REMOTIVE_DATETIME).timestamp()
22
if len(res.json()['jobs']) < limit or date > publication_date:
23
print(f"Jobs from catgory {category} fetched ✅")
24
return get_remotive_jobs_since(res.json()['jobs'], date)
25
else:
26
return get_category_jobs_since(category, date, limit + 5)
27
return []
28
29
def get_jobs_since(categories: list,
30
date_from: int):
31
if date_from >= 0:
32
return("'date_from' must be negative. Please update your parameter.")
33
# Transform datefrom int to
34
search_jobs_from = date_from * 24 * 60 * 60 # days in seconds
35
timestamp_date = time.time() + search_jobs_from
36
37
jobs = []
38
for category in categories:
39
jobs += get_category_jobs_since(category, timestamp_date, 5)
40
print(f'- All job since {datetime.fromtimestamp(timestamp_date)} have been fetched:', len(jobs))
41
return pd.DataFrame(jobs)
42
43
df_jobs = get_jobs_since(categories, date_from=date_from)
44
df_jobs.head(5)
Copied!

Output

Save dataframe in csv

1
df_jobs.to_csv(csv_output, index=False)
Copied!
Copy link
Edit on GitHub