Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Step to Fetch Missing Git Messages from Commits Analyzed Before Commits Message Table was Added #2905

Merged
merged 3 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions augur/application/db/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

logger = logging.getLogger("db_lib")

def convert_type_of_value(config_dict, logger=None):

Check warning on line 20 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name) Raw Output: augur/application/db/lib.py:20:39: W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name)


data_type = config_dict["type"]
Expand Down Expand Up @@ -177,11 +177,27 @@

try:
working_commits = fetchall_data_from_sql_text(query)
except:

Check warning on line 180 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0702: No exception type(s) specified (bare-except) Raw Output: augur/application/db/lib.py:180:4: W0702: No exception type(s) specified (bare-except)
working_commits = []

return working_commits

def get_missing_commit_message_hashes(repo_id):

fetch_missing_hashes_sql = s.sql.text("""
SELECT DISTINCT cmt_commit_hash FROM commits
WHERE repo_id=:repo_id
AND cmt_commit_hash NOT IN
(SELECT DISTINCT cmt_hash FROM commit_messages WHERE repo_id=:repo_id);
""").bindparams(repo_id=repo_id)

try:
missing_commit_hashes = fetchall_data_from_sql_text(fetch_missing_hashes_sql)
except:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0702: No exception type(s) specified (bare-except)

missing_commit_hashes = []

return missing_commit_hashes

def get_worker_oauth_keys(platform: str):

with get_session() as session:
Expand All @@ -197,7 +213,7 @@
return session.query(CollectionStatus).filter(getattr(CollectionStatus,f"{collection_type}_status" ) == CollectionState.COLLECTING.value).count()


def facade_bulk_insert_commits(logger, records):

Check warning on line 216 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name) Raw Output: augur/application/db/lib.py:216:31: W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name)

with get_session() as session:

Expand Down Expand Up @@ -239,7 +255,7 @@
raise e


def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]:

Check warning on line 258 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name) Raw Output: augur/application/db/lib.py:258:22: W0621: Redefining name 'logger' from outer scope (line 18) (redefined-outer-name)

if isinstance(data, list) is False:

Expand Down
46 changes: 45 additions & 1 deletion augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#SPDX-License-Identifier: MIT

import logging
import datetime
from celery import group, chain

from augur.application.db.lib import get_session, get_repo_by_repo_git, get_repo_by_repo_id, remove_working_commits_by_repo_id_and_hashes, get_working_commits_by_repo_id, facade_bulk_insert_commits, bulk_insert_dicts
from subprocess import check_output
from augur.application.db.lib import get_session, get_repo_by_repo_git, get_repo_by_repo_id, remove_working_commits_by_repo_id_and_hashes, get_working_commits_by_repo_id, facade_bulk_insert_commits, bulk_insert_dicts, get_missing_commit_message_hashes

from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import trim_commits
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path, get_parent_commits_set, get_existing_commits_set
Expand Down Expand Up @@ -157,6 +159,47 @@
facade_helper.update_status('Updating Contributors')
facade_helper.log_activity('Info', 'Updating Contributors with commits')

@celery.task(base=AugurFacadeRepoCollectionTask)
def facade_fetch_missing_commit_messages(repo_git):
logger = logging.getLogger(facade_fetch_missing_commit_messages.__name__)
facade_helper = FacadeHelper(logger)

repo = get_repo_by_repo_git(repo_git)

logger.debug(f"Fetching missing commit message records for repo {repo_git}")

missing_message_hashes = get_missing_commit_message_hashes(repo.repo_id)

to_insert = []

for hash in missing_message_hashes:

absolute_path = get_absolute_repo_path(facade_helper.repo_base_directory, repo.repo_id, repo.repo_path,repo.repo_name)
repo_loc = (f"{absolute_path}/.git")

commit_message = check_output(
f"git --git-dir {repo_loc} log --format=%B -n 1 {hash}".split()
).decode('utf-8').strip()

msg_record = {
'repo_id' : repo.repo_id,
'cmt_msg' : commit_message,
'cmt_hash' : hash,
'tool_source' : 'Facade',
'tool_version' : '0.78?',
'data_source' : 'git',
'data_collection_date' : datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}

if len(to_insert) >= 1000:
bulk_insert_dicts(logger,to_insert, CommitMessage, ["repo_id","cmt_hash"])
to_insert = []

to_insert.append(msg_record)

if to_insert:
bulk_insert_dicts(logger, to_insert, CommitMessage, ["repo_id","cmt_hash"])


#enable celery multithreading
@celery.task(base=AugurFacadeRepoCollectionTask)
Expand Down Expand Up @@ -193,7 +236,7 @@
facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}")


if not len(missing_commits) or repo_id is None:

Check warning on line 239 in augur/tasks/git/facade_tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len) Raw Output: augur/tasks/git/facade_tasks.py:239:7: C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len)
#session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits))
return

Expand Down Expand Up @@ -354,6 +397,7 @@

analysis_sequence.append(trim_commits_post_analysis_facade_task.si(repo_git))

analysis_sequence.append(facade_fetch_missing_commit_messages.si(repo_git))

analysis_sequence.append(facade_analysis_end_facade_task.si())

Expand Down
Loading