BUG(KAN-218) tested workaround locally FIXME, help wanted

jmikedupont2 · Jun 23, 2024 · 3b2f04c · 3b2f04c
1 parent 864db23
commit 3b2f04c
Show file tree

Hide file tree

Showing 8 changed files with 42 additions and 14 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -64,8 +64,11 @@ RUN mkdir litellm
 RUN touch litellm/__init__.py
 COPY litellm/py.typed litellm/py.typed 
 
+
+
 RUN python -m build
 RUN pip install dist/*.whl
+
 # Runtime stage
 FROM $LITELLM_RUNTIME_IMAGE as runtime
 
@@ -82,6 +85,9 @@ COPY --from=builder /wheels/ /wheels/
 #RUN rm litellm-*.whl
 # Install the built wheel using pip; again using a wildcard if it's the only file
 #RUN ls *.whl /wheels/*
+RUN apt-get update
+RUN apt-get install -y git
+RUN rm /wheels/multiaddr-*.whl #conflicts
 RUN pip install  /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
 
 # now we can add the application code and install it
@@ -91,6 +97,7 @@ COPY requirements.txt requirements.txt
 COPY litellm/py.typed litellm/py.typed 
 COPY litellm  litellm
 COPY enterprise  enterprise
+
 RUN pip install -e .
 # Generate prisma client
 RUN prisma generate

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -11,6 +11,14 @@ services:
     ports:
       - "4000:4000" # Map the container port to the host, change the host port if necessary
     environment:
+      # - MAX_DISK_SPACE=${MAX_DISK_SPACE}
+      # - PUBLIC_NAME=${PUBLIC_NAME}
+      # - MODEL=${MODEL}
+      # - INITIAL_PEERS=${INITIAL_PEERS}
+      # - DEVICE=${DEVICE}
+      # - BLOCKS=${BLOCKS}
+      # - PORT=${PORT}
+    ##
         DATABASE_URL: "postgresql://postgres:example@db:5432/postgres"
         STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
     env_file:

diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py
@@ -5,6 +5,9 @@
 import litellm
 from litellm.utils import ModelResponse, Usage
 from .prompt_templates.factory import prompt_factory, custom_prompt
+import torch
+from transformers import AutoTokenizer
+from petals import AutoDistributedModelForCausalLM  # type: ignore
 
 
 class PetalsError(Exception):
@@ -99,7 +102,10 @@ def completion(
 ):
     ## Load Config
     config = litellm.PetalsConfig.get_config()
+
     for k, v in config.items():
+        #{'model_name': 'petals-sauerkraut', 'litellm_params': {'model_config': {'extra': 'allow', 'arbitrary_types_allowed': True}, 'model': 'petals/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct'}, 'model_info': {'id': '1', 'db_model': False, 'model_config': {'extra': 'allow', 'arbitrary_types_allowed': True}}, 'model_config': {'extra': 'allow', 'protected_namespaces': (), 'arbitrary_types_allowed': True}} for model: petals-sauerkraut
+        print("DEBUG CONFIG",k,v)
         if (
             k not in optional_params
         ):  # completion(top_k=3) > petals_config(top_k=3) <- allows for dynamic variables to be passed in
@@ -147,21 +153,15 @@ def completion(
             PetalsError(status_code=response.status_code, message=str(e))
 
     else:
-        try:
-            import torch
-            from transformers import AutoTokenizer
-            from petals import AutoDistributedModelForCausalLM  # type: ignore
-        except:
-            raise Exception(
-                "Importing torch, transformers, petals failed\nTry pip installing petals \npip install git+https://github.com/bigscience-workshop/petals"
-            )
-
         model = model
 
         tokenizer = AutoTokenizer.from_pretrained(
             model, use_fast=False, add_bos_token=False
         )
-        model_obj = AutoDistributedModelForCausalLM.from_pretrained(model)
+        model_obj = AutoDistributedModelForCausalLM.from_pretrained(
+            model,
+            # 
+            initial_peers=["/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp"]) # FIXME, KAN-218
 
         ## LOGGING
         logging_obj.pre_call(

diff --git a/litellm/main.py b/litellm/main.py
@@ -2477,6 +2477,8 @@ def completion(
 
             custom_llm_provider = "petals"
             stream = optional_params.pop("stream", False)
+#            print("DEBUG optional_params",optional_params)
+#            print("DEBUG litellm_params",litellm_params)
             model_response = petals.completion(
                 model=model,
                 messages=messages,

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -1350,7 +1350,7 @@ async def get_config(self, config_file_path: Optional[str] = None) -> dict:
                         else:
                             # if it's not in the config - then add it
                             config[param_name] = param_value
-
+        #print("DEBUG config", config)
         return config
 
     async def save_config(self, new_config: dict):

diff --git a/litellm/utils.py b/litellm/utils.py
@@ -6,6 +6,7 @@
 # +-----------------------------------------------+
 #
 
+
 import ast
 import asyncio
 import base64

diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml
@@ -1,7 +1,14 @@
 model_list:
-  - model_name: petals-stable-beluga
+  - model_name: petals-llama-3
     litellm_params:
-      model: "petals/petals-team/StableBeluga2"
+      model: "petals/meta-llama/Meta-Llama-3-70B"
+    optional_params:
+      initial_peers: "/dns/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY"     
+  - model_name: petals-sauerkraut
+    litellm_params:
+      model: "petals/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct"
+    optional_params:
+      initial_peers: "/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp"
     model_info:
       id: "1"
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 # LITELLM PROXY DEPENDENCIES #
 #petals==-e git+https://github.com/bigscience-workshop/petals
--e git+https://github.com/Agent-Artificial/cellium-miner/#egg=petals
+
 
 anyio==4.2.0 # openai + http req.
 openai==1.27.0 # openai req. 
@@ -48,4 +48,7 @@ aioboto3==12.3.0 # for async sagemaker calls
 tenacity==8.2.3  # for retrying requests, when litellm.num_retries set
 pydantic # ==2.7.1 # proxy + openai req.
 ijson==3.2.3 # for google ai studio streaming
+torch==2.2.2
 ####
+-e git+https://github.com/Agent-Artificial/hivemind@feature/KAN-217#egg=hivemind
+-e git+https://github.com/Agent-Artificial/cellium-miner@feature/KAN-217#egg=petals