Skip to content

Commit

Permalink
Merge pull request #2972 from chaoss/dev
Browse files Browse the repository at this point in the history
Release v0.80.0 - PR
  • Loading branch information
sgoggins authored Jan 21, 2025
2 parents a8dc898 + a7d11e1 commit f7ed468
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 44 deletions.
9 changes: 7 additions & 2 deletions augur/application/db/data_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,11 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t
Returns:
Parsed pr dict
"""
pr_closed_datetime = pr['closed_at']
pr_merged_datetime = pr['merged_at']

if not pr_closed_datetime:
pr_closed_datetime = pr_merged_datetime

pr_dict = {
'repo_id': repo_id,
Expand All @@ -810,8 +815,8 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t
'pr_body': pr['description'],
'pr_created_at': pr['created_at'],
'pr_updated_at': pr['updated_at'],
'pr_closed_at': pr['closed_at'],
'pr_merged_at': pr['merged_at'],
'pr_closed_at': pr_closed_datetime,
'pr_merged_at': pr_merged_datetime,
'pr_merge_commit_sha': pr['merge_commit_sha'],
'pr_teams': None,
'pr_milestone': pr['milestone'].get('title') if pr['milestone'] else None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,12 @@ def parse_poetry_lock(file_handle):
group = 'runtime'
for package in manifest['package']:
req = None
if package['category'] == 'main':
if package.get('category') == 'main':
group = 'runtime'
if package['category'] == 'dev':
if package.get('category') == 'dev':
group = 'develop'
if 'version' in package:
req = package['version']
req = package.get('version')
elif 'git' in package:
req = package['git']+'#'+package['ref']
Dict = {'name': package['name'], 'requirement': req, 'type': group, 'package': 'PYPI'}
Expand Down
56 changes: 29 additions & 27 deletions augur/tasks/git/dependency_libyear_tasks/libyear_util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,54 +32,56 @@ def get_parsed_deps(path,logger):

deps_file = None
dependency_list = list()

for f in file_list:
deps_file = find(f, path)
if not deps_file:

if not deps_file or not f:
continue
file_handle= open(deps_file)

if f == 'Requirement.txt':
dependency_list = parse_requirement_txt(file_handle)
short_file_name = os.path.split(deps_file)[-1]

if short_file_name == 'Requirement.txt':
dependency_list.extend(parse_requirement_txt(file_handle))

elif f == 'requirements.txt':
dependency_list = parse_requirement_txt(file_handle)
if short_file_name == 'requirements.txt':
dependency_list.extend(parse_requirement_txt(file_handle))

elif f == 'setup.py':
dependency_list = parse_setup_py(file_handle)
if short_file_name == 'setup.py':
dependency_list.extend(parse_setup_py(file_handle))

elif f == 'Pipfile':
dependency_list = parse_pipfile(file_handle)
if short_file_name == 'Pipfile':
dependency_list.extend(parse_pipfile(file_handle))

elif f == 'Pipfile.lock':
dependency_list = parse_pipfile_lock(file_handle)
if short_file_name == 'Pipfile.lock':
dependency_list.extend(parse_pipfile_lock(file_handle))

elif f == 'pyproject.toml':
dependency_list = parse_poetry(file_handle)
if short_file_name == 'pyproject.toml':
dependency_list.extend(parse_poetry(file_handle))

elif f == 'poetry.lock':
dependency_list = parse_poetry_lock(file_handle)
if short_file_name == 'poetry.lock':
dependency_list.extend(parse_poetry_lock(file_handle))

elif f == 'environment.yml':
dependency_list = parse_conda(file_handle)
if short_file_name == 'environment.yml':
dependency_list.extend(parse_conda(file_handle))

elif f == 'environment.yaml':
dependency_list = parse_conda(file_handle)
if short_file_name == 'environment.yaml':
dependency_list.extend(parse_conda(file_handle))

elif f == 'environment.yml.lock':
dependency_list = parse_conda(file_handle)
if f == 'environment.yml.lock':
dependency_list.extend(parse_conda(file_handle))

elif f == 'environment.yaml.lock':
dependency_list = parse_conda(file_handle)
if short_file_name == 'environment.yaml.lock':
dependency_list.extend(parse_conda(file_handle))

elif f == 'package.json':
if short_file_name == 'package.json':
try:
dependency_list = parse_package_json(file_handle)
dependency_list.extend(parse_package_json(file_handle))
except KeyError as e:
logger.error(f"package.json for repo at path {path} is missing required key: {e}\n Skipping file...")


return dependency_list
return dependency_list


def get_libyear(current_version, current_release_date, latest_version, latest_release_date):
Expand Down
41 changes: 30 additions & 11 deletions augur/tasks/github/messages.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging

from datetime import timedelta, timezone

from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
Expand All @@ -10,12 +10,13 @@
from augur.tasks.github.util.util import get_owner_repo
from augur.application.db.models import PullRequest, Message, Issue, PullRequestMessageRef, IssueMessageRef, Contributor, Repo, CollectionStatus
from augur.application.db import get_engine, get_session
from augur.application.db.lib import get_core_data_last_collected
from sqlalchemy.sql import text

platform_id = 1

@celery.task(base=AugurCoreRepoCollectionTask)
def collect_github_messages(repo_git: str) -> None:
def collect_github_messages(repo_git: str, full_collection: bool) -> None:

logger = logging.getLogger(collect_github_messages.__name__)

Expand All @@ -29,9 +30,15 @@ def collect_github_messages(repo_git: str) -> None:
owner, repo = get_owner_repo(repo_git)
task_name = f"{owner}/{repo}: Message Task"

if full_collection:
core_data_last_collected = None
else:
# subtract 2 days to ensure all data is collected
core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc)


if is_repo_small(repo_id):
message_data = fast_retrieve_all_pr_and_issue_messages(repo_git, logger, manifest.key_auth, task_name)
message_data = fast_retrieve_all_pr_and_issue_messages(repo_git, logger, manifest.key_auth, task_name, core_data_last_collected)

if message_data:
process_messages(message_data, task_name, repo_id, logger, augur_db)
Expand All @@ -40,7 +47,7 @@ def collect_github_messages(repo_git: str) -> None:
logger.info(f"{owner}/{repo} has no messages")

else:
process_large_issue_and_pr_message_collection(repo_id, repo_git, logger, manifest.key_auth, task_name, augur_db)
process_large_issue_and_pr_message_collection(repo_id, repo_git, logger, manifest.key_auth, task_name, augur_db, core_data_last_collected)


def is_repo_small(repo_id):
Expand All @@ -51,13 +58,16 @@ def is_repo_small(repo_id):

return result != None

def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, task_name) -> None:
def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, task_name, since) -> None:

owner, repo = get_owner_repo(repo_git)

# url to get issue and pull request comments
url = f"https://api.github.com/repos/{owner}/{repo}/issues/comments"

if since:
url += f"?since={since.isoformat()}"

# define logger for task
logger.info(f"Collecting github comments for {owner}/{repo}")

Expand All @@ -70,7 +80,7 @@ def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, tas
return list(github_data_access.paginate_resource(url))


def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger, key_auth, task_name, augur_db) -> None:
def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger, key_auth, task_name, augur_db, since) -> None:

owner, repo = get_owner_repo(repo_git)

Expand All @@ -81,11 +91,20 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger

with engine.connect() as connection:

query = text(f"""
(select pr_comments_url from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc)
UNION
(select comments_url as comment_url from issues WHERE repo_id={repo_id} order by created_at desc);
""")
if since:
query = text(f"""
(select pr_comments_url from pull_requests WHERE repo_id={repo_id} AND pr_updated_at > timestamptz(timestamp '{since}') order by pr_created_at desc)
UNION
(select comments_url as comment_url from issues WHERE repo_id={repo_id} AND updated_at > timestamptz(timestamp '{since}') order by created_at desc);
""")
else:

query = text(f"""
(select pr_comments_url from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc)
UNION
(select comments_url as comment_url from issues WHERE repo_id={repo_id} order by created_at desc);
""")


result = connection.execute(query).fetchall()
comment_urls = [x[0] for x in result]
Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/start_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def primary_repo_collect_phase(repo_git, full_collection):
#Define secondary group that can't run until after primary jobs have finished.
secondary_repo_jobs = group(
collect_events.si(repo_git),#*create_grouped_task_load(dataList=first_pass, task=collect_events).tasks,
collect_github_messages.si(repo_git), #*create_grouped_task_load(dataList=first_pass,task=collect_github_messages).tasks,
collect_github_messages.si(repo_git, full_collection), #*create_grouped_task_load(dataList=first_pass,task=collect_github_messages).tasks,
collect_github_repo_clones_data.si(repo_git),
)

Expand Down

0 comments on commit f7ed468

Please sign in to comment.