Skip to content

Commit

Permalink
BUG(KAN-218) tested workaround locally FIXME, help wanted
Browse files Browse the repository at this point in the history
  • Loading branch information
mike dupont committed Jun 23, 2024
1 parent 864db23 commit 3b2f04c
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 14 deletions.
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,11 @@ RUN mkdir litellm
RUN touch litellm/__init__.py
COPY litellm/py.typed litellm/py.typed



RUN python -m build
RUN pip install dist/*.whl

# Runtime stage
FROM $LITELLM_RUNTIME_IMAGE as runtime

Expand All @@ -82,6 +85,9 @@ COPY --from=builder /wheels/ /wheels/
#RUN rm litellm-*.whl
# Install the built wheel using pip; again using a wildcard if it's the only file
#RUN ls *.whl /wheels/*
RUN apt-get update
RUN apt-get install -y git
RUN rm /wheels/multiaddr-*.whl #conflicts
RUN pip install /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels

# now we can add the application code and install it
Expand All @@ -91,6 +97,7 @@ COPY requirements.txt requirements.txt
COPY litellm/py.typed litellm/py.typed
COPY litellm litellm
COPY enterprise enterprise

RUN pip install -e .
# Generate prisma client
RUN prisma generate
Expand Down
8 changes: 8 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ services:
ports:
- "4000:4000" # Map the container port to the host, change the host port if necessary
environment:
# - MAX_DISK_SPACE=${MAX_DISK_SPACE}
# - PUBLIC_NAME=${PUBLIC_NAME}
# - MODEL=${MODEL}
# - INITIAL_PEERS=${INITIAL_PEERS}
# - DEVICE=${DEVICE}
# - BLOCKS=${BLOCKS}
# - PORT=${PORT}
##
DATABASE_URL: "postgresql://postgres:example@db:5432/postgres"
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
env_file:
Expand Down
20 changes: 10 additions & 10 deletions litellm/llms/petals.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import litellm
from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt
import torch
from transformers import AutoTokenizer
from petals import AutoDistributedModelForCausalLM # type: ignore


class PetalsError(Exception):
Expand Down Expand Up @@ -99,7 +102,10 @@ def completion(
):
## Load Config
config = litellm.PetalsConfig.get_config()

for k, v in config.items():
#{'model_name': 'petals-sauerkraut', 'litellm_params': {'model_config': {'extra': 'allow', 'arbitrary_types_allowed': True}, 'model': 'petals/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct'}, 'model_info': {'id': '1', 'db_model': False, 'model_config': {'extra': 'allow', 'arbitrary_types_allowed': True}}, 'model_config': {'extra': 'allow', 'protected_namespaces': (), 'arbitrary_types_allowed': True}} for model: petals-sauerkraut
print("DEBUG CONFIG",k,v)
if (
k not in optional_params
): # completion(top_k=3) > petals_config(top_k=3) <- allows for dynamic variables to be passed in
Expand Down Expand Up @@ -147,21 +153,15 @@ def completion(
PetalsError(status_code=response.status_code, message=str(e))

else:
try:
import torch
from transformers import AutoTokenizer
from petals import AutoDistributedModelForCausalLM # type: ignore
except:
raise Exception(
"Importing torch, transformers, petals failed\nTry pip installing petals \npip install git+https://github.com/bigscience-workshop/petals"
)

model = model

tokenizer = AutoTokenizer.from_pretrained(
model, use_fast=False, add_bos_token=False
)
model_obj = AutoDistributedModelForCausalLM.from_pretrained(model)
model_obj = AutoDistributedModelForCausalLM.from_pretrained(
model,
#
initial_peers=["/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp"]) # FIXME, KAN-218

## LOGGING
logging_obj.pre_call(
Expand Down
2 changes: 2 additions & 0 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2477,6 +2477,8 @@ def completion(

custom_llm_provider = "petals"
stream = optional_params.pop("stream", False)
# print("DEBUG optional_params",optional_params)
# print("DEBUG litellm_params",litellm_params)
model_response = petals.completion(
model=model,
messages=messages,
Expand Down
2 changes: 1 addition & 1 deletion litellm/proxy/proxy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,7 @@ async def get_config(self, config_file_path: Optional[str] = None) -> dict:
else:
# if it's not in the config - then add it
config[param_name] = param_value

#print("DEBUG config", config)
return config

async def save_config(self, new_config: dict):
Expand Down
1 change: 1 addition & 0 deletions litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# +-----------------------------------------------+
#


import ast
import asyncio
import base64
Expand Down
11 changes: 9 additions & 2 deletions proxy_server_config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
model_list:
- model_name: petals-stable-beluga
- model_name: petals-llama-3
litellm_params:
model: "petals/petals-team/StableBeluga2"
model: "petals/meta-llama/Meta-Llama-3-70B"
optional_params:
initial_peers: "/dns/bootstrap1.petals.dev/tcp/31337/p2p/QmedTaZXmULqwspJXz44SsPZyTNKxhnnFvYRajfH7MGhCY"
- model_name: petals-sauerkraut
litellm_params:
model: "petals/VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct"
optional_params:
initial_peers: "/dns/dht1.cillium.dev.compute.agentartificial.com/tcp/8008/p2p/QmYUro5QJx3YvgC4A9UBXL3ESdb3wSHXZzqUL19Fmy5Gsp"
model_info:
id: "1"

Expand Down
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# LITELLM PROXY DEPENDENCIES #
#petals==-e git+https://github.com/bigscience-workshop/petals
-e git+https://github.com/Agent-Artificial/cellium-miner/#egg=petals


anyio==4.2.0 # openai + http req.
openai==1.27.0 # openai req.
Expand Down Expand Up @@ -48,4 +48,7 @@ aioboto3==12.3.0 # for async sagemaker calls
tenacity==8.2.3 # for retrying requests, when litellm.num_retries set
pydantic # ==2.7.1 # proxy + openai req.
ijson==3.2.3 # for google ai studio streaming
torch==2.2.2
####
-e git+https://github.com/Agent-Artificial/hivemind@feature/KAN-217#egg=hivemind
-e git+https://github.com/Agent-Artificial/cellium-miner@feature/KAN-217#egg=petals

0 comments on commit 3b2f04c

Please sign in to comment.