Strange memory footprint #2589

zxgx · 2024-12-22T06:02:52Z

Hi, I'm using lm_eval.simple_evaluate to test deepseek-ai/DeepSeek-V2-Lite with 8 H100 GPUs and HFLM, and I found a skewed memory overhead to a fixed batch size 16.

[0] NVIDIA H100 80GB HBM3 | 50°C,  18 % | 36147 / 81559 MB | e1154485(36138M)
[1] NVIDIA H100 80GB HBM3 | 43°C,  28 % | 62013 / 81559 MB | e1154485(62002M)
[2] NVIDIA H100 80GB HBM3 | 41°C,   0 % | 36159 / 81559 MB | e1154485(36150M)
[3] NVIDIA H100 80GB HBM3 | 50°C,  22 % | 36161 / 81559 MB | e1154485(36152M)
[4] NVIDIA H100 80GB HBM3 | 59°C,  26 % | 36149 / 81559 MB | e1154485(36140M)
[5] NVIDIA H100 80GB HBM3 | 56°C,  54 % | 70093 / 81559 MB | e1154485(70084M)
[6] NVIDIA H100 80GB HBM3 | 50°C,   0 % | 36147 / 81559 MB | e1154485(36138M)
[7] NVIDIA H100 80GB HBM3 | 42°C,   0 % | 36149 / 81559 MB | e1154485(36140M)

It seems that both computatio and memory are under utilized.
I also tried to set batch_size=auto, and the profiling runs with the max_seq which is 16k and returns a batch size 1. This batch size is clearly not the optimal one.

Do you have any clue to improve the evaluation efficiency? Is vllm a better choice?

Attached is my script

import os
import logging
import argparse
from pprint import pformat
import json
import numpy as np
import torch
import lm_eval
from lm_eval.models.huggingface import HFLM

logging.basicConfig(
    format="%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s",
    datefmt="%Y-%m-%d:%H:%M:%S",
    level=logging.INFO,
)


def _handle_non_serializable(o):
    """Copied from https://github.com/meta-llama/llama-recipes/blob/b5f64c0b69d7ff85ec186d964c6c557d55025969/tools/benchmarks/llm_eval_harness/eval.py#L18
    """
    if isinstance(o, np.int64) or isinstance(o, np.int32):
        return int(o)
    elif isinstance(o, set):
        return list(o)
    else:
        return str(o)


def get_args():
    parser = argparse.ArgumentParser()
    # model config
    parser.add_argument("--hf_model", type=str, required=True)
    parser.add_argument("--trust_remote_code", action="store_true")
    parser.add_argument("--dtype", type=torch.dtype, default=torch.bfloat16)
    # lm_eval config
    parser.add_argument(
        "--tasks", type=str, nargs='+', #default=['mmlu', 'winogrande'])
        default=[
            # English
            "mmlu", "winogrande", "hellaswag",
            # Math
            "gsm8k", "hendrycks_math", 
            # Chinese
            "ceval", "cmmlu",
            ])
    parser.add_argument(
        "--num_fewshots", type=int, nargs='+', #default=[5, 5])
        default=[
            # English
            # "mmlu", "winogrande", "hellaswag", 
            5, 5, 10,
            # Math
            # "gsm8k", "hendrycks_math", 
            8, 4,
            # Chinese
            # "ceval", "cmmlu",
            5, 5,
            ])
    parser.add_argument("--limit", type=int, default=None)

    parser.add_argument("--output_dir", type=str, default=None)

    return parser.parse_args()


def main():
    logging.info(f"OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', None)}")
    args = get_args()
    logging.info(f"{pformat(vars(args), indent=2, width=120)}")

    hf_model = args.hf_model
    
    model_kwargs = {
        "trust_remote_code": True if args.trust_remote_code else None,
        # "torch_dtype": args.dtype,  # override by `dtype`` in lm_eval.HFLM
        "dtype": args.dtype,
        "device_map": "auto",
        "batch_size": 16, #"auto:4",
        "backend": "causal"
    }

    if "deepseek-ai/DeepSeek-V2.5-1210" == hf_model:
        # hardcoded according to the model card
        max_memory = {i: "75GB" for i in range(8)}
        model_kwargs = {
            "trust_remote_code": True,
            "device_map": "sequential",
            "dtype": torch.bfloat16,
            "max_memory": max_memory,
            "attn_implementation": "eager",
            "batch_size": "auto",
            "backend": "causal",
        }
    
    lm_eval_kwargs = {
        "limit": args.limit,
        "log_samples": False,
    }
    
    lm_obj = HFLM(hf_model, parallelize=True, **model_kwargs)
    if "deepseek-ai/DeepSeek-V2.5-1210" == hf_model:
        lm_obj.model.generation_config.pad_token_id = lm_obj.model.generation_config.eos_token_id
    
    if args.output_dir:
        os.makedirs(args.output_dir, exist_ok=True)

    for task, num_fewshot in zip(args.tasks, args.num_fewshots):
        logging.info(f"Evaluate task: {task} with fewshot {num_fewshot}")
        results = lm_eval.simple_evaluate(
            model=lm_obj,
            tasks=[task],
            num_fewshot=num_fewshot,
            **lm_eval_kwargs,
        )

        if args.output_dir:
            with open(os.path.join(args.output_dir, f"{hf_model.replace('/', '_')}-{task}.json"), "w") as f:
                json.dump(results, f, default=_handle_non_serializable, indent=2)
        logging.info(pformat(results))


if __name__ == "__main__":
    main()

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Strange memory footprint #2589

Strange memory footprint #2589

zxgx commented Dec 22, 2024

Strange memory footprint #2589

Strange memory footprint #2589

Comments

zxgx commented Dec 22, 2024