Get jobs from categories
Tags: #remoteok #jobs #csv #snippet
Author: Sanjeet Attili
With this notebook, you will be able to get jobs offer from Remoteok:
  • URL: Job offer url.
  • TITLE: Job title.
  • COMPANY: Company name.
  • TAGS: Tags link to job.
  • LOCATION: Location link to job.
  • PUBLICATION_DATE: Date of publication.

Input

Import libraries

1
import pandas as pd
2
import requests
3
from datetime import datetime
4
import time
Copied!

Setup Remoteok

1
categories = ['machine learning',
2
'data science',
3
'nlp',
4
'deep learning',
5
'computer vision',
6
'data',
7
'natural language processing',
8
'data engineer']
9
date_from = -30 ### this is 30 days from now => must be negative
Copied!

Variables

1
csv_output = "REMOTIVE_JOBS.csv"
Copied!

Model

Get jobs from RemoteOk

1
REMOTEOK_API = "https://remoteok.com/api"
2
REMOTEOK_DATETIME = "%Y-%m-%dT%H:%M:%S"
3
NAAS_DATETIME = "%Y-%m-%d %H:%M:%S"
4
5
def get_jobs(remoteok_url, categories):
6
df = pd.DataFrame()
7
headers = {
8
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
9
}
10
index=0
11
for tag in categories:
12
url = remoteok_url + f"?tag={tag}"
13
res = requests.get(url, headers=headers)
14
try:
15
res.raise_for_status()
16
except requests.HTTPError as e:
17
return e
18
19
job_details = res.json()
20
21
if len(job_details)==1:
22
continue
23
else:
24
for idx, job in enumerate(job_details):
25
if idx!=0:
26
date = job['date'].split('+')[0]
27
publication_time = datetime.strptime(date, REMOTEOK_DATETIME).timestamp()
28
required_time = time.time() + date_from* 24 * 60 * 60 ### time in seconds
29
30
if publication_time >= required_time:
31
df.loc[index, 'URL'] = job.get('url')
32
df.loc[index, 'TITLE'] = job.get('position')
33
df.loc[index, 'COMPANY'] = job.get('company')
34
df.loc[index, 'TAGS'] = ", ".join(job.get('tags'))
35
df.loc[index, 'LOCATION'] = job.get('location')
36
df.loc[index, 'PUBLICATION_DATE'] = datetime.fromtimestamp(publication_time).strftime(NAAS_DATETIME)
37
index+=1
38
39
df = df.sort_values(by='PUBLICATION_DATE', ascending=False)
40
return df
41
42
df_jobs = get_jobs(REMOTEOK_API, categories)
43
df_jobs.head(5)
Copied!

Output

Save dataframe in csv

1
df_jobs.to_csv(csv_output, index=False)
Copied!
Copy link
Edit on GitHub