From 3b2f04c8cb1a42fe5db8bcbf62d2e41a3a72f52a Mon Sep 17 00:00:00 2001 From: mike dupont Date: Sat, 22 Jun 2024 22:12:42 -0400 Subject: [PATCH] BUG(KAN-218) tested workaround locally FIXME, help wanted --- Dockerfile | 7 +++++++ docker-compose.yml | 8 ++++++++ litellm/llms/petals.py | 20 ++++++++++---------- litellm/main.py | 2 ++ litellm/proxy/proxy_server.py | 2 +- litellm/utils.py | 1 + proxy_server_config.yaml | 11 +++++++++-- requirements.txt | 5 ++++- 8 files changed, 42 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index 69c283d91..22c867d2d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,8 +64,11 @@ RUN mkdir litellm RUN touch litellm/__init__.py COPY litellm/py.typed litellm/py.typed + + RUN python -m build RUN pip install dist/*.whl + # Runtime stage FROM $LITELLM_RUNTIME_IMAGE as runtime @@ -82,6 +85,9 @@ COPY --from=builder /wheels/ /wheels/ #RUN rm litellm-*.whl # Install the built wheel using pip; again using a wildcard if it's the only file #RUN ls *.whl /wheels/* +RUN apt-get update +RUN apt-get install -y git +RUN rm /wheels/multiaddr-*.whl #conflicts RUN pip install /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels # now we can add the application code and install it @@ -91,6 +97,7 @@ COPY requirements.txt requirements.txt COPY litellm/py.typed litellm/py.typed COPY litellm litellm COPY enterprise enterprise + RUN pip install -e . # Generate prisma client RUN prisma generate diff --git a/docker-compose.yml b/docker-compose.yml index 97c1d1bd4..4db59e727 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,14 @@ services: ports: - "4000:4000" # Map the container port to the host, change the host port if necessary environment: + # - MAX_DISK_SPACE=${MAX_DISK_SPACE} + # - PUBLIC_NAME=${PUBLIC_NAME} + # - MODEL=${MODEL} + # - INITIAL_PEERS=${INITIAL_PEERS} + # - DEVICE=${DEVICE} + # - BLOCKS=${BLOCKS} + # - PORT=${PORT} + ## DATABASE_URL: "postgresql://postgres:example@db:5432/postgres" STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI env_file: diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py index 89cf2e57f..7dfd34967 100644 --- a/litellm/llms/petals.py +++ b/litellm/llms/petals.py @@ -5,6 +5,9 @@ import litellm from litellm.utils import ModelResponse, Usage from .prompt_templates.factory import prompt_factory, custom_prompt +import torch +from transformers import AutoTokenizer +from petals import AutoDistributedModelForCausalLM # type: ignore class PetalsError(Exception): @@ -99,7 +102,10 @@ def completion( ): ## Load Config config = litellm.PetalsConfig.get_config() + for k, v in config.items(): + #{'model_name': 'petals-sauerkraut', 'litellm_params': {'model_config': {'extra': 'allow', 'arbitrary_types_allowed': True}, 'model': 'petals/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct'}, 'model_info': {'id': '1', 'db_model': False, 'model_config': {'extra': 'allow', 'arbitrary_types_allowed': True}}, 'model_config': {'extra': 'allow', 'protected_namespaces': (), 'arbitrary_types_allowed': True}} for model: petals-sauerkraut + print("DEBUG CONFIG",k,v) if ( k not in optional_params ): # completion(top_k=3) > petals_config(top_k=3) <- allows for dynamic variables to be passed in @@ -147,21 +153,15 @@ def completion( PetalsError(status_code=response.status_code, message=str(e)) else: - try: - import torch - from transformers import AutoTokenizer - from petals import AutoDistributedModelForCausalLM # type: ignore - except: - raise Exception( - "Importing torch, transformers, petals failed\nTry pip installing petals \npip install git+https://github.com/bigscience-workshop/petals" - ) - model = model tokenizer = AutoTokenizer.from_pretrained( model, use_fast=False, add_bos_token=False ) - model_obj = AutoDistributedModelForCausalLM.from_pretrained(model) + model_obj = AutoDistributedModelForCausalLM.from_pretrained( + model, + # + initial_peers=["/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp"]) # FIXME, KAN-218 ## LOGGING logging_obj.pre_call( diff --git a/litellm/main.py b/litellm/main.py index 4c347ef2e..074a407d4 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2477,6 +2477,8 @@ def completion( custom_llm_provider = "petals" stream = optional_params.pop("stream", False) +# print("DEBUG optional_params",optional_params) +# print("DEBUG litellm_params",litellm_params) model_response = petals.completion( model=model, messages=messages, diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 8009b1ad4..cfd787d93 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1350,7 +1350,7 @@ async def get_config(self, config_file_path: Optional[str] = None) -> dict: else: # if it's not in the config - then add it config[param_name] = param_value - + #print("DEBUG config", config) return config async def save_config(self, new_config: dict): diff --git a/litellm/utils.py b/litellm/utils.py index 4bd4f3049..6ac72ba6d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6,6 +6,7 @@ # +-----------------------------------------------+ # + import ast import asyncio import base64 diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index 7702461ab..b9b8f4754 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -1,7 +1,14 @@ model_list: - - model_name: petals-stable-beluga + - model_name: petals-llama-3 litellm_params: - model: "petals/petals-team/StableBeluga2" + model: "petals/meta-llama/Meta-Llama-3-70B" + optional_params: + initial_peers: "/dns/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY" + - model_name: petals-sauerkraut + litellm_params: + model: "petals/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct" + optional_params: + initial_peers: "/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp" model_info: id: "1" diff --git a/requirements.txt b/requirements.txt index e467c7fc5..01dc88f81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # LITELLM PROXY DEPENDENCIES # #petals==-e git+https://github.com/bigscience-workshop/petals --e git+https://github.com/Agent-Artificial/cellium-miner/#egg=petals + anyio==4.2.0 # openai + http req. openai==1.27.0 # openai req. @@ -48,4 +48,7 @@ aioboto3==12.3.0 # for async sagemaker calls tenacity==8.2.3 # for retrying requests, when litellm.num_retries set pydantic # ==2.7.1 # proxy + openai req. ijson==3.2.3 # for google ai studio streaming +torch==2.2.2 #### +-e git+https://github.com/Agent-Artificial/hivemind@feature/KAN-217#egg=hivemind +-e git+https://github.com/Agent-Artificial/cellium-miner@feature/KAN-217#egg=petals