Skip to content

Commit

Permalink
feat: remaining pr search related functions and refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
gitcommitshow committed Dec 15, 2023
1 parent 89fb3aa commit 601a800
Show file tree
Hide file tree
Showing 6 changed files with 2,411 additions and 39 deletions.
61 changes: 61 additions & 0 deletions archive.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import * as path from 'path';
import * as fs from 'fs';

/**
* Writes json data to a csv file
* @param {Array<Object>} data - The data to be written to the file.
* @param {string} [options.archiveFolder=process.cwd()] - The folder where the file will be saved. Defaults to the current working directory.
* @param {string} [options.archiveFileName='archive-YYYYMMDDHHmmss.csv'] - The name of the file to be written. Defaults to 'archive-YYYYMMDDHHmmss.csv'.
*/
export function save(data, options = {}) {
if (!data || !Array.isArray(data) || data.length<1) {
console.log("No content to write.");
return;
}
// Prepare content for csv
let allKeys = Array.from(new Set(data.flatMap(Object.keys)));
const headers = allKeys.join(',');
const rows = data.map(obj => formatCSVRow(obj, allKeys)).join("\n");
const csvContent = headers + "\n" + rows;
writeToFile(csvContent, options);
return csvContent;
}

export function writeToFile(content, options){
const ARCHIVE_FOLDER = options.archiveFolder || process.cwd();
const ARCHIVE_FULL_PATH = path.join(ARCHIVE_FOLDER, options.archiveFileName || `archive-${getFormattedDate()}.csv`);
fs.writeFile(ARCHIVE_FULL_PATH, content, { flag: 'a+' }, err => {
if (err) {
console.error(err);
return;
}
console.log("The file was saved!");
});
}

function formatCSVRow(obj, keys) {
return keys.map(key => formatCSVCell(obj[key])).join(',');
}

function formatCSVCell(value) {
if (value === undefined || value === null) {
return '';
} else if (typeof value === 'object') {
// Stringify objects/arrays and escape double quotes
return `"${JSON.stringify(value).replace(/"/g, '""')}"`;
} else if (typeof value === 'string') {
// Check for commas or line breaks and escape double quotes
if (value.includes(',') || value.includes('\n')) {
return `"${value.replace(/"/g, '""')}"`;
} else {
return value;
}
} else {
return value.toString();
}
}

export function getFormattedDate() {
const now = new Date();
return now.toISOString().replace(/[\-\:T\.Z]/g, '');
}
113 changes: 85 additions & 28 deletions github.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
* @example To archive contributors leaderboard data in csv file, run `node contributors.js`
*/

import * as path from 'path';
import * as fs from 'fs';

import { makeRequest, makeRequestWithRateLimit } from './network.js';
import * as archive from './archive.js';

// Configurations (Optional)
// Repo owner that you want to analyze
Expand All @@ -26,6 +24,26 @@ if(GITHUB_PERSONAL_TOKEN){
GITHUB_REQUEST_OPTIONS.headers["Authorization"] = "token "+GITHUB_PERSONAL_TOKEN;
}


/**
* Get details of a Github repo
* @param {string} fullRepoNameOrUrl e.g. myorghandle/myreponame
* @param {number} pageNo
* @returns Promise<Array<Object> | String>
* @example getRepoDetail('myorghandle/myreponame').then((repoDetail) => console.log(repoDetail)).catch((err) => console.log(err))
*/
export async function getRepoDetail(fullRepoNameOrUrl, pageNo = 1) {
if(!fullRepoNameOrUrl) throw new Error("Invalid input")
let fullRepoName = fullRepoNameOrUrl.match(/github\.com(?:\/repos)?\/([^\/]+\/[^\/]+)/)?.[1] || fullRepoNameOrUrl;
let url = `https://api.github.com/repos/${fullRepoName}`;
console.log(url);
const { res, data } = await makeRequestWithRateLimit('GET', url, Object.assign({},GITHUB_REQUEST_OPTIONS));
console.log("Repo detail request finished for " + fullRepoName)
// console.log(data)
let dataJson = JSON.parse(data);
return dataJson;
}

/**
* Get all github repos of an owner(user/org)
* @param {string} owner The organization or user name on GitHub
Expand Down Expand Up @@ -176,18 +194,12 @@ function writeContributorLeaderboardToFile(contributors, options={}) {
if(!contributors || contributors.length<1){
return;
}
const ARCHIVE_FOLDER = options.archiveFolder || process.cwd();
const ARCHIVE_FULL_PATH = path.join(ARCHIVE_FOLDER, options.archiveFileName || 'archive-gh-contributors-leaderboard.csv');
// Prepare data
let ghContributorLeaderboard = contributors.map((contributor) => {
return ["@" + contributor.login, contributor.contributions, contributor.html_url, contributor.avatar_url, contributor.topContributedRepo, contributor.allContributedRepos].join();
}).join("\n");
ghContributorLeaderboard = "Github Username,Total Contributions,Profile,Avatar,Most Contribution To,Contributed To\n" + ghContributorLeaderboard;
fs.writeFile(ARCHIVE_FULL_PATH, ghContributorLeaderboard, { flag: 'a+' }, function (err) {
if (err) {
return console.log(err);
}
console.log("The file was saved!");
});
archive.writeToFile(ghContributorLeaderboard, Object.assign({ archiveFileName: 'archive-gh-contributors-leaderboard.csv' }, options));
}

/**
Expand Down Expand Up @@ -215,14 +227,15 @@ export async function archiveContributorsLeaderboard(owner=REPO_OWNER, options)
/**
* Search pull requests
* @param {string} query
* @param {Object} options Additional options e.g. { pageNo: 1 }
* @param {Object} [options] Additional options
* @param {Object} [options.pageNo=1] Result page number
*/
export async function searchPullRequests(query, options) {
let pageNo = (options && options.pageNo) ? options.pageNo : 1;
if(options && options.GITHUB_PERSONAL_TOKEN){
GITHUB_REQUEST_OPTIONS.headers["Authorization"] = "token "+options.GITHUB_PERSONAL_TOKEN;
}
let queryString = encodeURIComponent(''+query+'type:pr')
let queryString = encodeURIComponent(query || ''+'+type:pr');
let url = `https://api.github.com/search/issues?q=${queryString}&per_page=100&page=${pageNo}&sort=${options.sort || 'created'}`;
const { res, data } = await makeRequestWithRateLimit('GET', url, Object.assign({},GITHUB_REQUEST_OPTIONS));
console.log("PR search request finished");
Expand All @@ -233,32 +246,76 @@ export async function searchPullRequests(query, options) {
}

/**
* Get all search results, not just one page
* Get all PRs matching query
* @param {string} query
* @param {Object} options
* @param {Object} options.maxResults limit maximum results
* @param {Object} [options]
* @param {Object} [options.maxResults=1000] limit maximum results
*/
export async function recursiveSearchPullRequests(query, options){
export async function recursivelySearchPullRequests(query, options){
let searchRequestOptions = Object.assign({ pageNo: 1, maxResults: 1000 }, options)
let prList = [];
let pageNo = 1;
let maxResults = options.maxResults || 10000;
let searchResultObject = await searchPullRequests(query, Object.assign({ pageNo: pageNo }, options));
let searchResultObject = await searchPullRequests(query, searchRequestOptions);
// Iterate over results if there are more results expected by the user
if(!searchResultObject || !searchResultObject.items || !searchResultObject.items.length<1 || maxResults < 100){
if(!searchResultObject || !searchResultObject.items || searchResultObject.items.length<1){
return prList;
}
prList.push(searchResultObject.items);
let incomplete_results = searchResultObject.incomplete_results;
while(prList.length < searchResultObject.total_count && !incomplete_results){
pageNo++;
prList.push(...searchResultObject.items);
while(prList.length < searchRequestOptions.maxResults && prList.length < searchResultObject.total_count){
searchRequestOptions.pageNo++;
try {
let nextPageSearchResultObject = await searchPullRequests(query, { pageNo: pageNo } );
let nextPageSearchResultObject = await searchPullRequests(query, searchRequestOptions);
prList.push(...nextPageSearchResultObject.items);
incomplete_results = nextPageSearchResultObject.incomplete_results;
} catch (err) {
console.log("Some issue in recursive search for pull requests")
console.log("Some issue in recursive search for pull requests");
break;
}
}
console.log("Found "+prList.length +" PRs"+" for "+query);
return prList;
}


/**
* Aggregates all pull requests based on a specified field
* @param {Object[]} pullRequests - An array of pull request objects.
* @param {string} aggregatorField - The field name used to aggregate the pull requests. Defaults to "repository_url".
* @returns {Object[]} An array of objects, each containing a unique value of the aggregator field and an array of all pull requests that share that value.
*/
export function aggregateAllPullRequests(pullRequests, aggregatorField = "repository_url") {
return pullRequests.reduce((grouped, currentItem) => {
// Skipping the items without aggregatorField
if (!currentItem[aggregatorField]) {
return grouped;
}
// Find or create the group for the current item
let group = grouped.find(g => g[aggregatorField] === currentItem[aggregatorField]);
if (!group) {
group = { [aggregatorField]: currentItem[aggregatorField], pull_requests: [] };
grouped.push(group);
}
// Add the current item to the group
group.pull_requests.push(currentItem);
return grouped;
}, []);
}

/**
* Archives repos that PRs
* @param {string} owner The organization or user name on GitHub
* @param {Object} options Additional options
*/
export async function archiveReposWithMatchingPullRequests(query, options) {
let pullRequests = await recursivelySearchPullRequests(query, options);
if (!pullRequests || pullRequests.length < 1) {
console.log("Failed to get PRs for query: "+query);
return;
}
let repos = aggregateAllPullRequests(pullRequests, 'repository_url');
if(!repos) throw new Error("No repo found");
for(let repo of repos){
let repoDetail = await getRepoDetail(repo['repository_url']);
Object.assign(repo, repoDetail);
}
archive.save(repos, { archiveFileName: `repos-pr-${query}-${options.maxResults || 1000}-${archive.getFormattedDate()}.csv` });
return repos;
}
2 changes: 2 additions & 0 deletions network.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ export async function makeRequestWithRateLimit(method, url, options){
if (!flow || flow.shouldRun()) {
// Add business logic to process incoming request
console.log("Request accepted. Processing...");
// Wait for 500ms
await new Promise(resolve => setTimeout(resolve, 2000));
const {res, data} = await makeRequest(...arguments)
return { res, data}
} else {
Expand Down
20 changes: 20 additions & 0 deletions test/archive.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { expect, assert } from "chai";
import * as archive from "../archive.js";

import * as pullRequestsFixture from './fixtures/pullRequests.fixture.js';

describe('archive.js', function() {

/** Archive test --START-- */

describe.skip('#archive(jsonArray);', async function() {
it('should save jsonArray to a csv file', async function() {
this.timeout(100000);
let content = await archive.save(pullRequestsFixture.VALID_PR_SEARCH_RESULT_ITEMS);
assert.isNotNull(content, "Repos not returned");
expect(content).to.be.an('string');
expect(content).to.have.lengthOf.greaterThan(2000);
})
})

})
Loading

0 comments on commit 601a800

Please sign in to comment.