Update database with GitHub repositories info
Tags: #notion #database #update #github #repositories #automation #scheduler
Last update: 2023-04-13 (Created: 2023-04-12)
Description: This notebook updates a Notion database with information from all repositories within your GitHub organization. The following data will be updated in your Notion database:
- Name: The name of the repository.
- GitHub URL: The URL for the repository on GitHub.
- Description: A brief description of what the repository is for.
- Default branch: The default branch for the repository (i.e., the branch that is checked out when someone first clones the repository).
- Visibility: The visibility status of the repository (e.g., public, private, or internal).
- Created date: The date when the repository was created.
- Last updated date: The date when the repository was last updated.
- Open Issues: The number of unresolved issues (i.e., bug reports, feature requests, or other tasks) in the repository.
- Forks: The number of times the repository has been forked (i.e., copied to another GitHub account).
- Stargazers: The number of GitHub users who have "starred" the repository (i.e., marked it as a favorite).
- Size: The size of the repository in terms of disk space used.
References:
import naas
from naas_drivers import notion
import pandas as pd
import re
from datetime import datetime
import os
import requests
import naas
import github
github_token
: GitHub personal access tokengithub_organization
: GitHub organization name
notion_token
: Notion token shared with your databasedatabase_key
: Database key name = page title in notionforce_update
: By default, the value is set to False, indicating that only dynamic data such as Forks, Stargazers, Open Issues, and Last updated date will be updated.notion_database
: URL of your notion databaseoutput_dir
: directory to save data extracted from LinkedIn
# Inputs
github_token = naas.secret.get('GITHUB_TOKEN') or "YOUR_TOKEN"
github_organization = "jupyter-naas"
notion_token = naas.secret.get("NOTION_TOKEN_METRICS") or "YOUR_TOKEN"
database_key = "Name"
force_update = False
# Outputs
notion_database = "https://www.notion.so/naas-official/93377d8407d84b01b26558913ff6b573?v=1122f024d70e4099aa51f70f0fa9b1ae&pvs=4"
output_dir = "/home/ftp/naas-notion-os/outputs/repositories"
# Schedule during week days every day at 8:00 PM
naas.scheduler.add(cron="0 20 * * 1-5")
# naas.scheduler.delete() # uncomment and execute cell to delete automation
def create_notion_db(notion_database, key, token):
# Get database
database_id = notion_database.split("/")[-1].split("?v=")[0]
pages = notion.connect(token).database.query(database_id, query={})
# Init
df_output = pd.DataFrame()
# Loop on page
for page in pages:
# Get page_id
page_id = page.id
# Create dataframe from page
df = page.df()
# Remove empty pages
page_title = df.loc[df.Name == key, "Value"].values[0]
if page_title == "":
notion.connect(token).blocks.delete(page_id)
print(f"Page '{page_id}' empty => removed from database")
else:
# Pivot rows to columns
columns = df["Name"].unique().tolist()
new_df = df.copy()
new_df = new_df.drop("Type", axis=1)
new_df = new_df.T
for i, c in enumerate(new_df.columns):
new_df = new_df.rename(columns={c: columns[i]})
new_df = new_df.drop("Name").reset_index(drop=True)
# Add page ID
new_df["PAGE_ID"] = page_id
# Concat dataframe
df_output = pd.concat([df_output, new_df])
return df_output
df_notion = create_notion_db(
notion_database,
database_key,
notion_token
)
print("📊 Notion DB:", len(df_notion))
df_notion.head(1)
# Create a Github instance
g = github.Github(github_token)
# Get the organization
org = g.get_organization(github_organization)
# List all the repositories
repos = org.get_repos()
# Get data
database_id = notion_database.split("/")[-1].split("?v=")[0]
for repo in repos:
# Init
page_new = False
page_id = None
name = repo.name
print("➡️ Started for:", name)
# Create or get page
notion_page = df_notion.loc[df_notion["Name"] == name, "PAGE_ID"]
if len(notion_page) > 0:
page_id = notion_page.values[0]
page = notion.connect(notion_token).page.get(page_id)
else:
page = notion.connect(notion_token).Page.new(database_id=database_id).create()
page.title("Name", repo.name)
page_new = True
# Update static data
if page_new or force_update:
page.date("Created date", repo.created_at.strftime("%Y-%m-%d"))
page.link("GitHub url", repo.html_url)
page.select("Default branch", repo.default_branch)
page.select("Visibility", repo.visibility)
if repo.description:
page.rich_text("Description", repo.description)
# Update dynamic data
page.number("Stargazers", repo.stargazers_count)
page.number("Forks", repo.forks_count)
page.number("Size", repo.size)
page.number("Open Issues", repo.open_issues)
page.date("Last updated date", repo.updated_at.strftime("%Y-%m-%d"))
page.update()
print(f"✅ Data successfully updated in Notion.")
# Save page to csv
df = page.df()
csv_name = f"{datetime.now().strftime('%Y%m%d')}_{name}_info.csv"
output_path = os.path.join(output_dir, name)
if not os.path.exists(output_path):
os.makedirs(output_path)
csv_path = os.path.join(output_path, csv_name)
df.to_csv(csv_path, index=False)
print(f"✅ Data saved to csv:", csv_path)