Skip to content

Commit

Permalink
Bump version to v0.4.4 ; Fixes to TMMLUplus (#2280)
Browse files Browse the repository at this point in the history
  • Loading branch information
haileyschoelkopf authored Sep 5, 2024
1 parent 7a1614e commit 543617f
Show file tree
Hide file tree
Showing 77 changed files with 195 additions and 220 deletions.
7 changes: 0 additions & 7 deletions lm_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,6 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
"When `fewshot_as_multiturn` is selected, `apply_chat_template` must be set (either to `True` or to the chosen template name)."
)

if (
args.num_fewshot is None or args.num_fewshot == 0
) and args.fewshot_as_multiturn:
raise ValueError(
"If fewshot_as_multiturn is set, num_fewshot must be greater than 0."
)

if args.include_path is not None:
eval_logger.info(f"Including path: {args.include_path}")
task_manager = TaskManager(args.verbosity, include_path=args.include_path)
Expand Down
10 changes: 6 additions & 4 deletions lm_eval/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,12 @@ def _get_task_and_group(self, task_dir: str):
if attr in config:
if attr == "group" and print_info:
self.logger.info(
"`group` and `group_alias` keys in tasks' configs will no longer be used in the next release of lm-eval. "
"`tag` will be used to allow to call a collection of tasks just like `group`. "
"`group` will be removed in order to not cause confusion with the new ConfigurableGroup "
"which will be the official way to create groups with addition of group-wide configurations."
"`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
"The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
"`group`s which aggregate across subtasks must be only defined in a separate group config file, "
"which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
"Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
"for more information."
)
print_info = False
# attr = "tag"
Expand Down
13 changes: 13 additions & 0 deletions lm_eval/tasks/tmmluplus/default/_tmmluplus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
group: tmmluplus
task:
- tmmluplus_other
- tmmluplus_social_sciences
- tmmluplus_humanities
- tmmluplus_STEM
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2.0
10 changes: 10 additions & 0 deletions lm_eval/tasks/tmmluplus/default/_tmmluplus_STEM.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
group: tmmluplus_STEM
task:
- tmmluplus_STEM_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2.0
10 changes: 10 additions & 0 deletions lm_eval/tasks/tmmluplus/default/_tmmluplus_humanities.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
group: tmmluplus_humanities
task:
- tmmluplus_humanities_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2.0
10 changes: 10 additions & 0 deletions lm_eval/tasks/tmmluplus/default/_tmmluplus_other.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
group: tmmluplus_other
task:
- tmmluplus_other_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2.0
10 changes: 10 additions & 0 deletions lm_eval/tasks/tmmluplus/default/_tmmluplus_social_sciences.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
group: tmmluplus_social_sciences
task:
- tmmluplus_social_sciences_tasks
aggregate_metric_list:
- metric: acc
weight_by_size: True
- metric: acc_norm
weight_by_size: True
metadata:
version: 2.0
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
version: 2.0
6 changes: 0 additions & 6 deletions lm_eval/tasks/tmmluplus/default/tmmluplus.yaml

This file was deleted.

5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_accounting.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "accounting"
"description": "以下為會計學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_accounting"
"task_alias": "accounting"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "administrative_law"
"description": "以下為行政法的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"tag": "tmmluplus_humanities_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_administrative_law"
"task_alias": "administrative law"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "advance_chemistry"
"description": "以下為化學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_STEM"
"group_alias": "STEM"
"include": "_default_template_yaml"
"tag": "tmmluplus_STEM_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_advance_chemistry"
"task_alias": "advance chemistry"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_agriculture.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "agriculture"
"description": "以下為農業的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_agriculture"
"task_alias": "agriculture"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "anti_money_laundering"
"description": "以下為洗錢防制的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"tag": "tmmluplus_humanities_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_anti_money_laundering"
"task_alias": "anti money laundering"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_auditing.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "auditing"
"description": "以下為審計學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_auditing"
"task_alias": "auditing"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "basic_medical_science"
"description": "以下為基礎醫學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_STEM"
"group_alias": "STEM"
"include": "_default_template_yaml"
"tag": "tmmluplus_STEM_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_basic_medical_science"
"task_alias": "basic medical science"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "business_management"
"description": "以下為企業管理的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_business_management"
"task_alias": "business management"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "chinese_language_and_literature"
"description": "以下為國文的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_chinese_language_and_literature"
"task_alias": "chinese language and literature"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "clinical_psychology"
"description": "以下為臨床心理學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_clinical_psychology"
"task_alias": "clinical psychology"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "computer_science"
"description": "以下為資訊工程的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_STEM"
"group_alias": "STEM"
"include": "_default_template_yaml"
"tag": "tmmluplus_STEM_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_computer_science"
"task_alias": "computer science"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "culinary_skills"
"description": "以下為餐旅的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_culinary_skills"
"task_alias": "culinary skills"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_dentistry.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "dentistry"
"description": "以下為牙醫學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_dentistry"
"task_alias": "dentistry"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_economics.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "economics"
"description": "以下為經濟學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_economics"
"task_alias": "economics"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_education.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "education"
"description": "以下為教育常識的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_education"
"task_alias": "education"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "education_(profession_level)"
"description": "以下為教育專業的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_education_(profession_level)"
"task_alias": "education (profession level)"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "educational_psychology"
"description": "以下為教育心理的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_educational_psychology"
"task_alias": "educational psychology"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "engineering_math"
"description": "以下為工程數學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_STEM"
"group_alias": "STEM"
"include": "_default_template_yaml"
"tag": "tmmluplus_STEM_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_engineering_math"
"task_alias": "engineering math"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "finance_banking"
"description": "以下為金融與法規的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_finance_banking"
"task_alias": "finance banking"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "financial_analysis"
"description": "以下為財務分析的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_financial_analysis"
"task_alias": "financial analysis"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_fire_science.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "fire_science"
"description": "以下為火災學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_fire_science"
"task_alias": "fire science"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "general_principles_of_law"
"description": "以下為法學大意的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"tag": "tmmluplus_humanities_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_general_principles_of_law"
"task_alias": "general principles of law"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "geography_of_taiwan"
"description": "以下為台灣地理的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_geography_of_taiwan"
"task_alias": "geography of taiwan"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_human_behavior.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "human_behavior"
"description": "以下為人類行為與社會的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_human_behavior"
"task_alias": "human behavior"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "insurance_studies"
"description": "以下為保險學的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_other"
"group_alias": "other"
"include": "_default_template_yaml"
"tag": "tmmluplus_other_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_insurance_studies"
"task_alias": "insurance studies"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "introduction_to_law"
"description": "以下為法律概論的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"tag": "tmmluplus_humanities_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_introduction_to_law"
"task_alias": "introduction to law"
5 changes: 2 additions & 3 deletions lm_eval/tasks/tmmluplus/default/tmmluplus_jce_humanities.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "jce_humanities"
"description": "以下為指考人文科目的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_humanities"
"group_alias": "humanities"
"include": "_default_template_yaml"
"tag": "tmmluplus_humanities_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_jce_humanities"
"task_alias": "jce humanities"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "junior_chemistry"
"description": "以下為國中理化的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_STEM"
"group_alias": "STEM"
"include": "_default_template_yaml"
"tag": "tmmluplus_STEM_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_junior_chemistry"
"task_alias": "junior chemistry"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"dataset_name": "junior_chinese_exam"
"description": "以下為國中會考基測國文的單選題,請提供正確答案的選項。\n\n"
"group": "tmmluplus_social_sciences"
"group_alias": "social sciences"
"include": "_default_template_yaml"
"tag": "tmmluplus_social_sciences_tasks"
"include": "_tmmluplus_template_yaml"
"task": "tmmluplus_junior_chinese_exam"
"task_alias": "junior chinese exam"
Loading

0 comments on commit 543617f

Please sign in to comment.