Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
1 contributor

Users who have contributed to this file

78 lines (53 sloc) 2.17 KB

Open Science MOOC Dashboard

Data and source code for this dashboard on the Open Science MOOC's GitHub repository statistics and user activities.

How to collect GitHub data

Setup

# Install and load packages using pacman
if (!require("pacman")) install.packages("pacman")
library(pacman)

p_load(httr, jsonlite, tidyverse)

Authentication

See https://towardsdatascience.com/accessing-data-from-github-api-using-r-3633fb62cb08 for instructions on how to set up your own GitHub app.

# Set OAuth
oauth_endpoints("github")
gh_app <- oauth_app(appname = "[INSERT HERE]",
                   key = "[INSERT HERE]",
                   secret = "[INSERT HERE]")

# Get credentials and config
github_token <- oauth2.0_token(oauth_endpoints("github"), gh_app)
gtoken <- httr::config(token = github_token)

Custom functions to retrieve data from GitHub

# Function to submit API request, parse JSON content, and convert to data frame 
get_data <- function(url) {
  
  res <- httr::GET(url, query = list(state = "all", per_page = 100, page = 1), gtoken)
  stop_for_status(res)
  res_df <- jsonlite::fromJSON(content(res, type = 'text', encoding = "UTF-8"))
  
  return(res_df)
}

# Function to submit multiple API requests, parse JSON content, and convert to data frame 
get_data_multiple <- function(urls) {
  
  res <- lapply(urls, get_data)
  res_df <- map_df(res, ~as.data.frame(.x), .id = "df_id")
  
  return(res_df)
}

Collect GitHub data on the Open Science MOOC

# Retrieve Open Science MOOC repos (modules 1-10 only)
repos_df <- get_data("https://api.github.com/orgs/OpenScienceMOOC/repos")
repos_df_mod <- repos_df %>% 
  filter(stringr::str_detect(name, "Module-"))

# Retrieve contributors for each repo
contributors_df <- get_data_multiple(repos_df_mod$contributors_url)

# Retrieve stargazers for each repo
stargazers_df <- get_data_multiple(repos_df_mod$stargazers_url)

# Retrieve subscribers for each repo
subscribers_df <- get_data_multiple(repos_df_mod$subscribers_url)

# Export data
save(repos_df_mod, contributors_df, stargazers_df, subscribers_df, file = "osmooc-github.RData")
You can’t perform that action at this time.