diff --git a/.circleci/config.yml b/.circleci/config.yml index 1525385d..5b636bee 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -74,7 +74,7 @@ jobs: resource_class: small parallelism: 1 steps: - - add_ssh_keys: # gives write access to CircleCI worker + - add_ssh_keys: # gives write access to CircleCI worker fingerprints: - "76:0c:1b:9e:e3:6a:c3:5c:6f:24:91:ef:7c:54:d2:7a" - checkout # checkout source code to working directory @@ -157,10 +157,10 @@ jobs: DOCKER_BUILDKIT=1 docker build -f model-engine/model_engine_server/inference/pytorch_or_tf.user.Dockerfile \ --build-arg BASE_IMAGE=temp:1.7.1-cuda11.0-cudnn8-runtime-$CIRCLE_SHA1 \ --build-arg REQUIREMENTS_FILE="$CIRCLE_SHA1-requirements.txt" \ - -t $CIRCLECI_AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com/hosted-model-inference/async-pytorch:1.7.1-cuda11.0-cudnn8-runtime-$CIRCLE_SHA1-021694 . + -t $CIRCLECI_AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com/hosted-model-inference/async-pytorch:1.7.1-cuda11.0-cudnn8-runtime-$CIRCLE_SHA1-b8c25b . rm $CIRCLE_SHA1-requirements.txt - minikube --logtostderr -v 1 image load $CIRCLECI_AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com/hosted-model-inference/async-pytorch:1.7.1-cuda11.0-cudnn8-runtime-$CIRCLE_SHA1-021694 + minikube --logtostderr -v 1 image load $CIRCLECI_AWS_ACCOUNT_ID.dkr.ecr.us-west-2.amazonaws.com/hosted-model-inference/async-pytorch:1.7.1-cuda11.0-cudnn8-runtime-$CIRCLE_SHA1-b8c25b - run: name: Install helm chart command: | @@ -207,23 +207,23 @@ commands: install_server: description: Installs LLM Engine server steps: - - python/install-packages: - pkg-manager: pip - app-dir: model-engine - - python/install-packages: - pkg-manager: pip - app-dir: model-engine - pip-dependency-file: requirements-test.txt - - python/install-packages: - pkg-manager: pip - app-dir: model-engine - pip-dependency-file: requirements_override.txt - - run: - name: Install Server - command: | - pushd model-engine - pip install -e . - popd + - python/install-packages: + pkg-manager: pip + app-dir: model-engine + - python/install-packages: + pkg-manager: pip + app-dir: model-engine + pip-dependency-file: requirements-test.txt + - python/install-packages: + pkg-manager: pip + app-dir: model-engine + pip-dependency-file: requirements_override.txt + - run: + name: Install Server + command: | + pushd model-engine + pip install -e . + popd install_client: description: Install LLM Engine client steps: diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py index 263d16cb..7f775e5a 100644 --- a/clients/python/llmengine/data_types.py +++ b/clients/python/llmengine/data_types.py @@ -6,9 +6,10 @@ from enum import Enum from typing import Any, Dict, List, Literal, Optional, Union -import pydantic +from pydantic.version import VERSION as PYDANTIC_VERSION -if int(pydantic.__version__.split(".")[0]) > 1: +PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.") +if PYDANTIC_V2: from pydantic.v1 import BaseModel, Field, HttpUrl else: from pydantic import BaseModel, Field, HttpUrl # type: ignore diff --git a/integration_tests/rest_api_utils.py b/integration_tests/rest_api_utils.py index f77d96ea..db087992 100644 --- a/integration_tests/rest_api_utils.py +++ b/integration_tests/rest_api_utils.py @@ -59,7 +59,12 @@ def my_model(**keyword_args): "framework_type": "pytorch", "pytorch_image_tag": "1.7.1-cuda11.0-cudnn8-runtime", }, - "requirements": ["cloudpickle==2.1.0", "pyyaml==6.0"], + "requirements": [ + "cloudpickle==2.1.0", + "pyyaml==6.0", + "pydantic==2.8.2", + "fastapi==0.110.0", + ], "location": "s3://model-engine-integration-tests/model_bundles/echo_bundle", }, } diff --git a/integration_tests/test_endpoints.py b/integration_tests/test_endpoints.py index 5b0a6404..5d7eae2a 100644 --- a/integration_tests/test_endpoints.py +++ b/integration_tests/test_endpoints.py @@ -232,7 +232,7 @@ def test_sync_streaming_model_endpoint(capsys): for response in task_responses: assert ( response.strip() - == 'data: {"status": "SUCCESS", "result": {"result": {"y": 1}}, "traceback": null}' + == 'data: {"status":"SUCCESS","result":{"result":{"y":1}},"traceback":null}' ) finally: delete_model_endpoint(create_endpoint_request["name"], user) diff --git a/model-engine/model_engine_server/common/dtos/batch_jobs.py b/model-engine/model_engine_server/common/dtos/batch_jobs.py index ce1af0c8..0600df22 100644 --- a/model-engine/model_engine_server/common/dtos/batch_jobs.py +++ b/model-engine/model_engine_server/common/dtos/batch_jobs.py @@ -13,20 +13,21 @@ GpuType, StorageSpecificationType, ) -from pydantic import BaseModel, root_validator +from pydantic import BaseModel, ConfigDict, model_validator class CreateBatchJobResourceRequests(BaseModel): - cpus: Optional[CpuSpecificationType] - memory: Optional[StorageSpecificationType] - gpus: Optional[int] - gpu_type: Optional[GpuType] - storage: Optional[StorageSpecificationType] - max_workers: Optional[int] - per_worker: Optional[int] + cpus: Optional[CpuSpecificationType] = None + memory: Optional[StorageSpecificationType] = None + gpus: Optional[int] = None + gpu_type: Optional[GpuType] = None + storage: Optional[StorageSpecificationType] = None + max_workers: Optional[int] = None + per_worker: Optional[int] = None class CreateBatchJobV1Request(BaseModel): + model_config = ConfigDict(protected_namespaces=()) model_bundle_id: str input_path: str serialization_format: BatchJobSerializationFormat @@ -41,10 +42,10 @@ class CreateBatchJobV1Response(BaseModel): class GetBatchJobV1Response(BaseModel): status: BatchJobStatus - result: Optional[str] + result: Optional[str] = None duration: timedelta - num_tasks_pending: Optional[int] - num_tasks_completed: Optional[int] + num_tasks_pending: Optional[int] = None + num_tasks_completed: Optional[int] = None class UpdateBatchJobV1Request(BaseModel): @@ -64,9 +65,7 @@ class CreateDockerImageBatchJobResourceRequests(BaseModel): gpus: Optional[int] = None gpu_type: Optional[GpuType] = None storage: Optional[StorageSpecificationType] = None - - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) @classmethod def merge_requests( @@ -93,7 +92,7 @@ def common_requests( class CreateDockerImageBatchJobV1Request(BaseModel): docker_image_batch_job_bundle_name: Optional[str] = None docker_image_batch_job_bundle_id: Optional[str] = None - job_config: Optional[Dict[str, Any]] + job_config: Optional[Dict[str, Any]] = None # TODO also expose a separate argument to pass an s3file to the job, as opposed to job_config labels: Dict[str, str] # TODO this probably should go in the bundle @@ -103,7 +102,7 @@ class CreateDockerImageBatchJobV1Request(BaseModel): override_job_max_runtime_s: Optional[int] = None - @root_validator + @model_validator(mode="before") def exactly_one_name_or_id(cls, values): bundle_name = values.get("docker_image_batch_job_bundle_name") bundle_id = values.get("docker_image_batch_job_bundle_id") @@ -166,16 +165,14 @@ class DockerImageBatchJobBundleV1Response(BaseModel): image_tag: str command: List[str] env: Dict[str, str] - mount_location: Optional[str] - cpus: Optional[str] - memory: Optional[str] - storage: Optional[str] - gpus: Optional[int] - gpu_type: Optional[str] - public: Optional[bool] - - class Config: - orm_mode = True + mount_location: Optional[str] = None + cpus: Optional[str] = None + memory: Optional[str] = None + storage: Optional[str] = None + gpus: Optional[int] = None + gpu_type: Optional[str] = None + public: Optional[bool] = None + model_config = ConfigDict(from_attributes=True) class ListDockerImageBatchJobBundleV1Response(BaseModel): diff --git a/model-engine/model_engine_server/common/dtos/core.py b/model-engine/model_engine_server/common/dtos/core.py new file mode 100644 index 00000000..ad709658 --- /dev/null +++ b/model-engine/model_engine_server/common/dtos/core.py @@ -0,0 +1,17 @@ +from pydantic import BaseModel, BeforeValidator, ConfigDict, HttpUrl, TypeAdapter +from typing_extensions import Annotated + +# See: https://github.com/pydantic/pydantic/issues/7186 +# pydantic v2 doesn't treat HttpUrl the same way as in v1 which causes various issue +# This is an attempt to make it behave as similar as possible +HttpUrlTypeAdapter = TypeAdapter(HttpUrl) +HttpUrlStr = Annotated[ + str, + BeforeValidator(lambda value: HttpUrlTypeAdapter.validate_python(value) and value), +] + + +class LLMEngineModel(BaseModel): + """Common pydantic configurations for model engine""" + + model_config = ConfigDict(protected_namespaces=()) diff --git a/model-engine/model_engine_server/common/dtos/docker_repository.py b/model-engine/model_engine_server/common/dtos/docker_repository.py index 6e4651d9..694c4098 100644 --- a/model-engine/model_engine_server/common/dtos/docker_repository.py +++ b/model-engine/model_engine_server/common/dtos/docker_repository.py @@ -10,8 +10,8 @@ class BuildImageRequest(BaseModel): base_path: str dockerfile: str base_image: str - requirements_folder: Optional[str] - substitution_args: Optional[Dict[str, str]] + requirements_folder: Optional[str] = None + substitution_args: Optional[Dict[str, str]] = None class BuildImageResponse(BaseModel): diff --git a/model-engine/model_engine_server/common/dtos/endpoint_builder.py b/model-engine/model_engine_server/common/dtos/endpoint_builder.py index 0edbeaaf..8ec2d2f9 100644 --- a/model-engine/model_engine_server/common/dtos/endpoint_builder.py +++ b/model-engine/model_engine_server/common/dtos/endpoint_builder.py @@ -20,19 +20,19 @@ class BuildEndpointRequest(BaseModel): cpus: CpuSpecificationType gpus: int memory: StorageSpecificationType - gpu_type: Optional[GpuType] - storage: Optional[StorageSpecificationType] + gpu_type: Optional[GpuType] = None + storage: Optional[StorageSpecificationType] = None optimize_costs: bool aws_role: str results_s3_bucket: str - child_fn_info: Optional[Dict[str, Any]] # TODO: remove this if we don't need it. - post_inference_hooks: Optional[List[str]] + child_fn_info: Optional[Dict[str, Any]] = None # TODO: remove this if we don't need it. + post_inference_hooks: Optional[List[str]] = None labels: Dict[str, str] - billing_tags: Optional[Dict[str, Any]] + billing_tags: Optional[Dict[str, Any]] = None prewarm: bool = True - high_priority: Optional[bool] - default_callback_url: Optional[str] - default_callback_auth: Optional[CallbackAuth] + high_priority: Optional[bool] = None + default_callback_url: Optional[str] = None + default_callback_auth: Optional[CallbackAuth] = None class BuildEndpointStatus(str, Enum): diff --git a/model-engine/model_engine_server/common/dtos/llms.py b/model-engine/model_engine_server/common/dtos/llms.py index 40d6f2ca..b35bff36 100644 --- a/model-engine/model_engine_server/common/dtos/llms.py +++ b/model-engine/model_engine_server/common/dtos/llms.py @@ -6,6 +6,7 @@ from typing import Any, Dict, List, Optional +from model_engine_server.common.dtos.core import HttpUrlStr from model_engine_server.common.dtos.model_endpoints import ( CpuSpecificationType, GetModelEndpointV1Response, @@ -23,7 +24,7 @@ ModelEndpointStatus, Quantization, ) -from pydantic import BaseModel, Field, HttpUrl +from pydantic import BaseModel, ConfigDict, Field class CreateLLMModelEndpointV1Request(BaseModel): @@ -51,23 +52,23 @@ class CreateLLMModelEndpointV1Request(BaseModel): # General endpoint fields metadata: Dict[str, Any] # TODO: JSON type - post_inference_hooks: Optional[List[str]] + post_inference_hooks: Optional[List[str]] = None endpoint_type: ModelEndpointType = ModelEndpointType.SYNC - cpus: Optional[CpuSpecificationType] - gpus: Optional[int] - memory: Optional[StorageSpecificationType] - gpu_type: Optional[GpuType] - storage: Optional[StorageSpecificationType] - optimize_costs: Optional[bool] + cpus: Optional[CpuSpecificationType] = None + gpus: Optional[int] = None + memory: Optional[StorageSpecificationType] = None + gpu_type: Optional[GpuType] = None + storage: Optional[StorageSpecificationType] = None + optimize_costs: Optional[bool] = None min_workers: int max_workers: int per_worker: int labels: Dict[str, str] - prewarm: Optional[bool] - high_priority: Optional[bool] - billing_tags: Optional[Dict[str, Any]] - default_callback_url: Optional[HttpUrl] - default_callback_auth: Optional[CallbackAuth] + prewarm: Optional[bool] = None + high_priority: Optional[bool] = None + billing_tags: Optional[Dict[str, Any]] = None + default_callback_url: Optional[HttpUrlStr] = None + default_callback_auth: Optional[CallbackAuth] = None public_inference: Optional[bool] = True # LLM endpoints are public by default. @@ -99,43 +100,43 @@ class ListLLMModelEndpointsV1Response(BaseModel): class UpdateLLMModelEndpointV1Request(BaseModel): # LLM specific fields - model_name: Optional[str] - source: Optional[LLMSource] - inference_framework_image_tag: Optional[str] - num_shards: Optional[int] + model_name: Optional[str] = None + source: Optional[LLMSource] = None + inference_framework_image_tag: Optional[str] = None + num_shards: Optional[int] = None """ Number of shards to distribute the model onto GPUs. """ - quantize: Optional[Quantization] + quantize: Optional[Quantization] = None """ Whether to quantize the model. """ - checkpoint_path: Optional[str] + checkpoint_path: Optional[str] = None """ Path to the checkpoint to load the model from. """ # General endpoint fields - metadata: Optional[Dict[str, Any]] - post_inference_hooks: Optional[List[str]] - cpus: Optional[CpuSpecificationType] - gpus: Optional[int] - memory: Optional[StorageSpecificationType] - gpu_type: Optional[GpuType] - storage: Optional[StorageSpecificationType] - optimize_costs: Optional[bool] - min_workers: Optional[int] - max_workers: Optional[int] - per_worker: Optional[int] - labels: Optional[Dict[str, str]] - prewarm: Optional[bool] - high_priority: Optional[bool] - billing_tags: Optional[Dict[str, Any]] - default_callback_url: Optional[HttpUrl] - default_callback_auth: Optional[CallbackAuth] - public_inference: Optional[bool] + metadata: Optional[Dict[str, Any]] = None + post_inference_hooks: Optional[List[str]] = None + cpus: Optional[CpuSpecificationType] = None + gpus: Optional[int] = None + memory: Optional[StorageSpecificationType] = None + gpu_type: Optional[GpuType] = None + storage: Optional[StorageSpecificationType] = None + optimize_costs: Optional[bool] = None + min_workers: Optional[int] = None + max_workers: Optional[int] = None + per_worker: Optional[int] = None + labels: Optional[Dict[str, str]] = None + prewarm: Optional[bool] = None + high_priority: Optional[bool] = None + billing_tags: Optional[Dict[str, Any]] = None + default_callback_url: Optional[HttpUrlStr] = None + default_callback_auth: Optional[CallbackAuth] = None + public_inference: Optional[bool] = None class UpdateLLMModelEndpointV1Response(BaseModel): @@ -225,7 +226,7 @@ class CompletionSyncV1Response(BaseModel): Response object for a synchronous prompt completion task. """ - request_id: Optional[str] + request_id: Optional[str] = None output: Optional[CompletionOutput] = None @@ -323,7 +324,7 @@ class CompletionStreamV1Response(BaseModel): Response object for a stream prompt completion task. """ - request_id: Optional[str] + request_id: Optional[str] = None output: Optional[CompletionStreamOutput] = None error: Optional[StreamError] = None """Error of the response (if any).""" @@ -520,7 +521,9 @@ class CreateBatchCompletionsRequest(BaseModel): Request object for batch completions. """ - input_data_path: Optional[str] + model_config = ConfigDict(protected_namespaces=()) + + input_data_path: Optional[str] = None output_data_path: str """ Path to the output file. The output file will be a JSON file of type List[CompletionOutput]. @@ -530,10 +533,14 @@ class CreateBatchCompletionsRequest(BaseModel): Either `input_data_path` or `content` needs to be provided. When input_data_path is provided, the input file should be a JSON file of type BatchCompletionsRequestContent. """ - model_config: CreateBatchCompletionsModelConfig + model_cfg: CreateBatchCompletionsModelConfig = Field(alias="model_config") """ Model configuration for the batch inference. Hardware configurations are inferred. + + We rename model_config from api to model_cfg in engine since engine uses pydantic v2 which + reserves model_config as a keyword. """ + data_parallelism: Optional[int] = Field(default=1, ge=1, le=64) """ Number of replicas to run the batch inference. More replicas are slower to schedule but faster to inference. @@ -555,14 +562,6 @@ class CreateBatchCompletionsEngineRequest(CreateBatchCompletionsRequest): hidden from the DTO exposed to the client. """ - model_cfg: CreateBatchCompletionsModelConfig - """ - Model configuration for the batch inference. Hardware configurations are inferred. - - We rename model_config from api to model_cfg in engine since engine uses pydantic v2 which - reserves model_config as a keyword. - """ - max_gpu_memory_utilization: Optional[float] = Field(default=0.9, le=1.0) """ Maximum GPU memory utilization for the batch inference. Default to 90%. @@ -574,8 +573,8 @@ def from_api(request: CreateBatchCompletionsRequest) -> "CreateBatchCompletionsE input_data_path=request.input_data_path, output_data_path=request.output_data_path, content=request.content, - model_config=request.model_config, - model_cfg=request.model_config, + model_config=request.model_cfg, + model_cfg=request.model_cfg, data_parallelism=request.data_parallelism, max_runtime_sec=request.max_runtime_sec, tool_config=request.tool_config, diff --git a/model-engine/model_engine_server/common/dtos/model_bundles.py b/model-engine/model_engine_server/common/dtos/model_bundles.py index 778b2942..d49537c4 100644 --- a/model-engine/model_engine_server/common/dtos/model_bundles.py +++ b/model-engine/model_engine_server/common/dtos/model_bundles.py @@ -10,7 +10,7 @@ ModelBundleFlavors, ModelBundlePackagingType, ) -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class CreateModelBundleV1Request(BaseModel): @@ -23,9 +23,9 @@ class CreateModelBundleV1Request(BaseModel): requirements: List[str] env_params: ModelBundleEnvironmentParams packaging_type: ModelBundlePackagingType - metadata: Optional[Dict[str, Any]] - app_config: Optional[Dict[str, Any]] - schema_location: Optional[str] + metadata: Optional[Dict[str, Any]] = None + app_config: Optional[Dict[str, Any]] = None + schema_location: Optional[str] = None class CloneModelBundleV1Request(BaseModel): @@ -38,7 +38,7 @@ class CloneModelBundleV1Request(BaseModel): The ID of the ModelBundle to copy from. """ - new_app_config: Optional[Dict[str, Any]] + new_app_config: Optional[Dict[str, Any]] = None """ The app_config of the new ModelBundle. If not specified, then the new ModelBundle will use the same app_config as the original. @@ -50,6 +50,8 @@ class CreateModelBundleV1Response(BaseModel): Response object for creating a Model Bundle. """ + model_config = ConfigDict(protected_namespaces=()) + model_bundle_id: str @@ -58,6 +60,8 @@ class ModelBundleV1Response(BaseModel): Response object for a single Model Bundle. """ + model_config = ConfigDict(from_attributes=True, protected_namespaces=()) + id: str name: str location: str @@ -65,17 +69,10 @@ class ModelBundleV1Response(BaseModel): env_params: ModelBundleEnvironmentParams packaging_type: ModelBundlePackagingType metadata: Dict[str, Any] - app_config: Optional[Dict[str, Any]] + app_config: Optional[Dict[str, Any]] = None created_at: datetime.datetime model_artifact_ids: List[str] - schema_location: Optional[str] - - class Config: - """ - ModelBundleResponse Config class. - """ - - orm_mode = True + schema_location: Optional[str] = None class ListModelBundlesV1Response(BaseModel): @@ -83,6 +80,8 @@ class ListModelBundlesV1Response(BaseModel): Response object for listing Model Bundles. """ + model_config = ConfigDict(protected_namespaces=()) + model_bundles: List[ModelBundleV1Response] @@ -92,7 +91,7 @@ class CreateModelBundleV2Request(BaseModel): """ name: str - metadata: Optional[Dict[str, Any]] + metadata: Optional[Dict[str, Any]] = None schema_location: str flavor: ModelBundleFlavors = Field(..., discriminator="flavor") @@ -107,7 +106,7 @@ class CloneModelBundleV2Request(BaseModel): The ID of the ModelBundle to copy from. """ - new_app_config: Optional[Dict[str, Any]] + new_app_config: Optional[Dict[str, Any]] = None """ The app_config of the new ModelBundle. If not specified, then the new ModelBundle will use the same app_config as the original. @@ -119,6 +118,8 @@ class CreateModelBundleV2Response(BaseModel): Response object for creating a Model Bundle. """ + model_config = ConfigDict(protected_namespaces=()) + model_bundle_id: str @@ -127,27 +128,24 @@ class ModelBundleV2Response(BaseModel): Response object for a single Model Bundle. """ + model_config = ConfigDict(from_attributes=True, protected_namespaces=()) + id: str name: str metadata: Dict[str, Any] created_at: datetime.datetime model_artifact_ids: List[str] - schema_location: Optional[str] + schema_location: Optional[str] = None flavor: ModelBundleFlavors = Field(..., discriminator="flavor") - class Config: - """ - ModelBundleResponse Config class. - """ - - orm_mode = True - class ListModelBundlesV2Response(BaseModel): """ Response object for listing Model Bundles. """ + model_config = ConfigDict(protected_namespaces=()) + model_bundles: List[ModelBundleV2Response] diff --git a/model-engine/model_engine_server/common/dtos/model_endpoints.py b/model-engine/model_engine_server/common/dtos/model_endpoints.py index 06073ada..cfeb44bf 100644 --- a/model-engine/model_engine_server/common/dtos/model_endpoints.py +++ b/model-engine/model_engine_server/common/dtos/model_endpoints.py @@ -10,6 +10,7 @@ from enum import Enum from typing import Any, Dict, List, Optional +from model_engine_server.common.dtos.core import HttpUrlStr from model_engine_server.domain.entities import ( CallbackAuth, CpuSpecificationType, @@ -21,7 +22,7 @@ ModelEndpointType, StorageSpecificationType, ) -from pydantic import BaseModel, Field, HttpUrl +from pydantic import BaseModel, ConfigDict, Field class BrokerType(str, Enum): @@ -51,22 +52,22 @@ class CreateModelEndpointV1Request(BaseModel): model_bundle_id: str endpoint_type: ModelEndpointType metadata: Dict[str, Any] # TODO: JSON type - post_inference_hooks: Optional[List[str]] + post_inference_hooks: Optional[List[str]] = None cpus: CpuSpecificationType gpus: int = Field(..., ge=0) memory: StorageSpecificationType - gpu_type: Optional[GpuType] - storage: Optional[StorageSpecificationType] - optimize_costs: Optional[bool] + gpu_type: Optional[GpuType] = None + storage: Optional[StorageSpecificationType] = None + optimize_costs: Optional[bool] = None min_workers: int = Field(..., ge=0) max_workers: int = Field(..., ge=0) per_worker: int = Field(..., gt=0) labels: Dict[str, str] - prewarm: Optional[bool] - high_priority: Optional[bool] - billing_tags: Optional[Dict[str, Any]] - default_callback_url: Optional[HttpUrl] - default_callback_auth: Optional[CallbackAuth] + prewarm: Optional[bool] = None + high_priority: Optional[bool] = None + billing_tags: Optional[Dict[str, Any]] = None + default_callback_url: Optional[HttpUrlStr] = None + default_callback_auth: Optional[CallbackAuth] = None public_inference: Optional[bool] = Field(default=False) @@ -75,25 +76,25 @@ class CreateModelEndpointV1Response(BaseModel): class UpdateModelEndpointV1Request(BaseModel): - model_bundle_id: Optional[str] - metadata: Optional[Dict[str, Any]] # TODO: JSON type - post_inference_hooks: Optional[List[str]] - cpus: Optional[CpuSpecificationType] + model_bundle_id: Optional[str] = None + metadata: Optional[Dict[str, Any]] = None # TODO: JSON type + post_inference_hooks: Optional[List[str]] = None + cpus: Optional[CpuSpecificationType] = None gpus: Optional[int] = Field(default=None, ge=0) - memory: Optional[StorageSpecificationType] - gpu_type: Optional[GpuType] - storage: Optional[StorageSpecificationType] - optimize_costs: Optional[bool] + memory: Optional[StorageSpecificationType] = None + gpu_type: Optional[GpuType] = None + storage: Optional[StorageSpecificationType] = None + optimize_costs: Optional[bool] = None min_workers: Optional[int] = Field(default=None, ge=0) max_workers: Optional[int] = Field(default=None, ge=0) per_worker: Optional[int] = Field(default=None, gt=0) - labels: Optional[Dict[str, str]] - prewarm: Optional[bool] - high_priority: Optional[bool] - billing_tags: Optional[Dict[str, Any]] - default_callback_url: Optional[HttpUrl] - default_callback_auth: Optional[CallbackAuth] - public_inference: Optional[bool] + labels: Optional[Dict[str, str]] = None + prewarm: Optional[bool] = None + high_priority: Optional[bool] = None + billing_tags: Optional[Dict[str, Any]] = None + default_callback_url: Optional[HttpUrlStr] = None + default_callback_auth: Optional[CallbackAuth] = None + public_inference: Optional[bool] = None class UpdateModelEndpointV1Response(BaseModel): @@ -110,7 +111,7 @@ class GetModelEndpointV1Response(BaseModel): bundle_name: str status: ModelEndpointStatus post_inference_hooks: Optional[List[str]] = Field(default=None) - default_callback_url: Optional[HttpUrl] = Field(default=None) + default_callback_url: Optional[HttpUrlStr] = Field(default=None) default_callback_auth: Optional[CallbackAuth] = Field(default=None) labels: Optional[Dict[str, str]] = Field(default=None) aws_role: Optional[str] = Field(default=None) @@ -143,6 +144,7 @@ class ModelEndpointOrderBy(str, Enum): class GetModelEndpointsSchemaV1Response(BaseModel): + model_config = ConfigDict(protected_namespaces=()) model_endpoints_schema: ModelEndpointsSchema diff --git a/model-engine/model_engine_server/common/dtos/tasks.py b/model-engine/model_engine_server/common/dtos/tasks.py index 36c20903..b9919f68 100644 --- a/model-engine/model_engine_server/common/dtos/tasks.py +++ b/model-engine/model_engine_server/common/dtos/tasks.py @@ -6,15 +6,15 @@ from typing import Any, Optional from model_engine_server.domain.entities import CallbackAuth -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, RootModel -class ResponseSchema(BaseModel): - __root__: Any +class ResponseSchema(RootModel): + root: Any -class RequestSchema(BaseModel): - __root__: Any +class RequestSchema(RootModel): + root: Any class TaskStatus(str, Enum): diff --git a/model-engine/model_engine_server/common/dtos/triggers.py b/model-engine/model_engine_server/common/dtos/triggers.py index ee4d2121..3d75376e 100644 --- a/model-engine/model_engine_server/common/dtos/triggers.py +++ b/model-engine/model_engine_server/common/dtos/triggers.py @@ -4,15 +4,15 @@ import datetime from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field class CreateTriggerV1Request(BaseModel): name: str cron_schedule: str bundle_id: str - default_job_config: Optional[Dict[str, Any]] - default_job_metadata: Optional[Dict[str, str]] + default_job_config: Optional[Dict[str, Any]] = None + default_job_metadata: Optional[Dict[str, str]] = None class CreateTriggerV1Response(BaseModel): @@ -29,9 +29,7 @@ class GetTriggerV1Response(BaseModel): docker_image_batch_job_bundle_id: str default_job_config: Optional[Dict[str, Any]] = Field(default=None) default_job_metadata: Optional[Dict[str, str]] = Field(default=None) - - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class ListTriggersV1Response(BaseModel): @@ -39,8 +37,8 @@ class ListTriggersV1Response(BaseModel): class UpdateTriggerV1Request(BaseModel): - cron_schedule: Optional[str] - suspend: Optional[bool] + cron_schedule: Optional[str] = None + suspend: Optional[bool] = None class UpdateTriggerV1Response(BaseModel): diff --git a/model-engine/model_engine_server/domain/entities/batch_job_entity.py b/model-engine/model_engine_server/domain/entities/batch_job_entity.py index 6bf51b0d..62238d66 100644 --- a/model-engine/model_engine_server/domain/entities/batch_job_entity.py +++ b/model-engine/model_engine_server/domain/entities/batch_job_entity.py @@ -26,24 +26,24 @@ class BatchJobSerializationFormat(str, Enum): class BatchJobRecord(OwnedEntity): id: str created_at: datetime - completed_at: Optional[datetime] + completed_at: Optional[datetime] = None status: BatchJobStatus created_by: str owner: str model_bundle: ModelBundle - model_endpoint_id: Optional[str] - task_ids_location: Optional[str] - result_location: Optional[str] + model_endpoint_id: Optional[str] = None + task_ids_location: Optional[str] = None + result_location: Optional[str] = None class BatchJobProgress(BaseModel): - num_tasks_pending: Optional[int] - num_tasks_completed: Optional[int] + num_tasks_pending: Optional[int] = None + num_tasks_completed: Optional[int] = None class BatchJob(BaseModel): record: BatchJobRecord - model_endpoint: Optional[ModelEndpoint] + model_endpoint: Optional[ModelEndpoint] = None progress: BatchJobProgress @@ -57,7 +57,7 @@ class DockerImageBatchJob(BaseModel): created_by: str owner: str created_at: datetime - completed_at: Optional[datetime] + completed_at: Optional[datetime] = None status: BatchJobStatus # the status map relatively nicely onto BatchJobStatus annotations: Optional[Dict[str, str]] = None override_job_max_runtime_s: Optional[int] = None diff --git a/model-engine/model_engine_server/domain/entities/docker_image_batch_job_bundle_entity.py b/model-engine/model_engine_server/domain/entities/docker_image_batch_job_bundle_entity.py index 1ed2838d..9213af13 100644 --- a/model-engine/model_engine_server/domain/entities/docker_image_batch_job_bundle_entity.py +++ b/model-engine/model_engine_server/domain/entities/docker_image_batch_job_bundle_entity.py @@ -3,6 +3,7 @@ from model_engine_server.domain.entities import GpuType from model_engine_server.domain.entities.owned_entity import OwnedEntity +from pydantic import ConfigDict class DockerImageBatchJobBundle(OwnedEntity): @@ -15,13 +16,11 @@ class DockerImageBatchJobBundle(OwnedEntity): image_tag: str command: List[str] env: Dict[str, str] - mount_location: Optional[str] - cpus: Optional[str] - memory: Optional[str] - storage: Optional[str] - gpus: Optional[int] - gpu_type: Optional[GpuType] - public: Optional[bool] - - class Config: - orm_mode = True + mount_location: Optional[str] = None + cpus: Optional[str] = None + memory: Optional[str] = None + storage: Optional[str] = None + gpus: Optional[int] = None + gpu_type: Optional[GpuType] = None + public: Optional[bool] = None + model_config = ConfigDict(from_attributes=True) diff --git a/model-engine/model_engine_server/domain/entities/llm_fine_tune_entity.py b/model-engine/model_engine_server/domain/entities/llm_fine_tune_entity.py index 13188c06..b18bbdd2 100644 --- a/model-engine/model_engine_server/domain/entities/llm_fine_tune_entity.py +++ b/model-engine/model_engine_server/domain/entities/llm_fine_tune_entity.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Optional -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class LLMFineTuneTemplate(BaseModel): @@ -8,9 +8,7 @@ class LLMFineTuneTemplate(BaseModel): launch_endpoint_config: Dict[str, Any] default_hparams: Dict[str, Any] required_params: List[str] - - class Config: - orm_mode = True + model_config = ConfigDict(from_attributes=True) class LLMFineTuneEvent(BaseModel): diff --git a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py index 247539d0..e3ceb836 100644 --- a/model-engine/model_engine_server/domain/entities/model_bundle_entity.py +++ b/model-engine/model_engine_server/domain/entities/model_bundle_entity.py @@ -5,7 +5,7 @@ from model_engine_server.common.constants import DEFAULT_CELERY_TASK_NAME, LIRA_CELERY_TASK_NAME from model_engine_server.domain.entities.owned_entity import OwnedEntity -from pydantic import BaseModel, Field, root_validator +from pydantic import BaseModel, ConfigDict, Field, model_validator from typing_extensions import Literal @@ -38,12 +38,12 @@ class ModelBundleEnvironmentParams(BaseModel): """ framework_type: ModelBundleFrameworkType - pytorch_image_tag: Optional[str] # for pytorch - tensorflow_version: Optional[str] # for tensorflow - ecr_repo: Optional[str] # for custom base image - image_tag: Optional[str] # for custom base image + pytorch_image_tag: Optional[str] = None # for pytorch + tensorflow_version: Optional[str] = None # for tensorflow + ecr_repo: Optional[str] = None # for custom base image + image_tag: Optional[str] = None # for custom base image - @root_validator + @model_validator(mode="before") @classmethod def validate_fields_present_for_framework_type(cls, field_values): """ @@ -72,12 +72,7 @@ def validate_fields_present_for_framework_type(cls, field_values): ) return field_values - class Config: - """ - Model Bundle Environment Params Config class. - """ - - orm_mode = True + model_config = ConfigDict(from_attributes=True) class PytorchFramework(BaseModel): @@ -127,7 +122,7 @@ class ArtifactLike(BaseModel, ABC): framework: Union[PytorchFramework, TensorflowFramework, CustomFramework] = Field( ..., discriminator="framework_type" ) - app_config: Optional[Dict[str, Any]] + app_config: Optional[Dict[str, Any]] = None location: str @@ -159,7 +154,7 @@ class RunnableImageLike(BaseModel, ABC): command: List[str] predict_route: str = "/predict" healthcheck_route: str = "/readyz" - env: Optional[Dict[str, str]] + env: Optional[Dict[str, str]] = None protocol: Literal["http"] # TODO: add support for other protocols (e.g. grpc) readiness_initial_delay_seconds: int = 120 @@ -177,11 +172,11 @@ class TritonEnhancedRunnableImageFlavor(RunnableImageLike): flavor: Literal[ModelBundleFlavorType.TRITON_ENHANCED_RUNNABLE_IMAGE] triton_model_repository: str - triton_model_replicas: Optional[Dict[str, str]] + triton_model_replicas: Optional[Dict[str, str]] = None triton_num_cpu: float triton_commit_tag: str - triton_storage: Optional[str] - triton_memory: Optional[str] + triton_storage: Optional[str] = None + triton_memory: Optional[str] = None triton_readiness_initial_delay_seconds: int = 300 # will default to 300 seconds @@ -217,23 +212,17 @@ class ModelBundle(OwnedEntity): created_at: datetime.datetime metadata: Dict[str, Any] model_artifact_ids: List[str] - schema_location: Optional[str] + schema_location: Optional[str] = None owner: str flavor: ModelBundleFlavors = Field(..., discriminator="flavor") # LEGACY FIELDS - requirements: Optional[List[str]] # FIXME: Delete - location: Optional[str] # FIXME: Delete - env_params: Optional[ModelBundleEnvironmentParams] # FIXME: Delete - packaging_type: Optional[ModelBundlePackagingType] # FIXME: Delete - app_config: Optional[Dict[str, Any]] # FIXME: Delete - - class Config: - """ - Model Bundle Config class. - """ - - orm_mode = True + requirements: Optional[List[str]] = None # FIXME: Delete + location: Optional[str] = None # FIXME: Delete + env_params: Optional[ModelBundleEnvironmentParams] = None # FIXME: Delete + packaging_type: Optional[ModelBundlePackagingType] = None # FIXME: Delete + app_config: Optional[Dict[str, Any]] = None # FIXME: Delete + model_config = ConfigDict(from_attributes=True) def is_runnable(self) -> bool: """True iff the model bundle calls for it. diff --git a/model-engine/model_engine_server/domain/entities/model_endpoint_entity.py b/model-engine/model_engine_server/domain/entities/model_endpoint_entity.py index cb6277f6..a0f84c4e 100644 --- a/model-engine/model_engine_server/domain/entities/model_endpoint_entity.py +++ b/model-engine/model_engine_server/domain/entities/model_endpoint_entity.py @@ -12,7 +12,7 @@ from model_engine_server.domain.entities.gpu_type import GpuType from model_engine_server.domain.entities.model_bundle_entity import ModelBundle from model_engine_server.domain.entities.owned_entity import OwnedEntity -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, RootModel from typing_extensions import Literal ModelEndpointsSchema = OpenAPI @@ -42,9 +42,9 @@ class ModelEndpointResourceState(BaseModel): cpus: CpuSpecificationType # TODO(phil): try to use decimal.Decimal gpus: int = Field(..., ge=0) memory: StorageSpecificationType - gpu_type: Optional[GpuType] - storage: Optional[StorageSpecificationType] - optimize_costs: Optional[bool] + gpu_type: Optional[GpuType] = None + storage: Optional[StorageSpecificationType] = None + optimize_costs: Optional[bool] = None class ModelEndpointDeploymentState(BaseModel): @@ -71,8 +71,8 @@ class CallbackmTLSAuth(BaseModel): key: str -class CallbackAuth(BaseModel): - __root__: Union[CallbackBasicAuth, CallbackmTLSAuth] = Field(..., discriminator="kind") +class CallbackAuth(RootModel): + root: Union[CallbackBasicAuth, CallbackmTLSAuth] = Field(..., discriminator="kind") class ModelEndpointConfig(BaseModel): @@ -82,14 +82,14 @@ class ModelEndpointConfig(BaseModel): endpoint_name: str bundle_name: str - post_inference_hooks: Optional[List[str]] + post_inference_hooks: Optional[List[str]] = None user_id: Optional[str] = None billing_queue: Optional[str] = None billing_tags: Optional[Dict[str, Any]] = None default_callback_url: Optional[str] = None - default_callback_auth: Optional[CallbackAuth] + default_callback_auth: Optional[CallbackAuth] = None endpoint_id: Optional[str] = None - endpoint_type: Optional[ModelEndpointType] + endpoint_type: Optional[ModelEndpointType] = None bundle_id: Optional[str] = None labels: Optional[Dict[str, str]] = None @@ -102,8 +102,8 @@ def deserialize(serialized_config: str) -> "ModelEndpointConfig": class ModelEndpointUserConfigState(BaseModel): - app_config: Optional[Dict[str, Any]] - endpoint_config: Optional[ModelEndpointConfig] + app_config: Optional[Dict[str, Any]] = None + endpoint_config: Optional[ModelEndpointConfig] = None class ModelEndpointRecord(OwnedEntity): @@ -117,15 +117,15 @@ class ModelEndpointRecord(OwnedEntity): name: str created_by: str created_at: datetime.datetime - last_updated_at: Optional[datetime.datetime] - metadata: Optional[Dict[str, Any]] + last_updated_at: Optional[datetime.datetime] = None + metadata: Optional[Dict[str, Any]] = None creation_task_id: Optional[str] = Field(default=None) endpoint_type: ModelEndpointType destination: str status: ModelEndpointStatus current_model_bundle: ModelBundle owner: str - public_inference: Optional[bool] + public_inference: Optional[bool] = None class ModelEndpointInfraState(BaseModel): @@ -136,14 +136,14 @@ class ModelEndpointInfraState(BaseModel): deployment_name: str aws_role: str results_s3_bucket: str - child_fn_info: Optional[Dict[str, Any]] + child_fn_info: Optional[Dict[str, Any]] = None labels: Dict[str, str] deployment_state: ModelEndpointDeploymentState resource_state: ModelEndpointResourceState user_config_state: ModelEndpointUserConfigState prewarm: Optional[bool] = None - high_priority: Optional[bool] - num_queued_items: Optional[int] + high_priority: Optional[bool] = None + num_queued_items: Optional[int] = None image: str @@ -153,4 +153,4 @@ class ModelEndpoint(BaseModel): """ record: ModelEndpointRecord - infra_state: Optional[ModelEndpointInfraState] + infra_state: Optional[ModelEndpointInfraState] = None diff --git a/model-engine/model_engine_server/domain/entities/trigger_entity.py b/model-engine/model_engine_server/domain/entities/trigger_entity.py index ac515865..0d68ec92 100644 --- a/model-engine/model_engine_server/domain/entities/trigger_entity.py +++ b/model-engine/model_engine_server/domain/entities/trigger_entity.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Optional from model_engine_server.domain.entities.owned_entity import OwnedEntity +from pydantic import ConfigDict class Trigger(OwnedEntity): @@ -13,8 +14,6 @@ class Trigger(OwnedEntity): cron_schedule: str docker_image_batch_job_bundle_id: str - default_job_config: Optional[Dict[str, Any]] - default_job_metadata: Optional[Dict[str, str]] - - class Config: - orm_mode = True + default_job_config: Optional[Dict[str, Any]] = None + default_job_metadata: Optional[Dict[str, str]] = None + model_config = ConfigDict(from_attributes=True) diff --git a/model-engine/model_engine_server/domain/gateways/monitoring_metrics_gateway.py b/model-engine/model_engine_server/domain/gateways/monitoring_metrics_gateway.py index 38861ade..23759911 100644 --- a/model-engine/model_engine_server/domain/gateways/monitoring_metrics_gateway.py +++ b/model-engine/model_engine_server/domain/gateways/monitoring_metrics_gateway.py @@ -16,7 +16,7 @@ class MetricMetadata(BaseModel): user: User - model_name: Optional[str] + model_name: Optional[str] = None class MonitoringMetricsGateway(ABC): diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index f19e5ab5..a46b04bb 100644 --- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -2408,7 +2408,7 @@ async def create_batch_job_bundle( hardware: CreateDockerImageBatchJobResourceRequests, ) -> DockerImageBatchJobBundle: bundle_name = ( - f"{request.model_config.model}_{datetime.datetime.utcnow().strftime('%y%m%d-%H%M%S')}" + f"{request.model_cfg.model}_{datetime.datetime.utcnow().strftime('%y%m%d-%H%M%S')}" ) image_tag = self.docker_repository.get_latest_image_tag( @@ -2448,22 +2448,22 @@ async def create_batch_job_bundle( async def execute( self, user: User, request: CreateBatchCompletionsRequest ) -> CreateBatchCompletionsResponse: - request.model_config.checkpoint_path = get_checkpoint_path( - request.model_config.model, request.model_config.checkpoint_path + request.model_cfg.checkpoint_path = get_checkpoint_path( + request.model_cfg.model, request.model_cfg.checkpoint_path ) hardware = await _infer_hardware( self.llm_artifact_gateway, - request.model_config.model, - request.model_config.checkpoint_path, + request.model_cfg.model, + request.model_cfg.checkpoint_path, is_batch_job=True, ) # Reconcile gpus count with num_shards from request assert hardware.gpus is not None - if request.model_config.num_shards: - hardware.gpus = max(hardware.gpus, request.model_config.num_shards) + if request.model_cfg.num_shards: + hardware.gpus = max(hardware.gpus, request.model_cfg.num_shards) engine_request = CreateBatchCompletionsEngineRequest.from_api(request) - engine_request.model_config.num_shards = hardware.gpus + engine_request.model_cfg.num_shards = hardware.gpus if engine_request.tool_config and engine_request.tool_config.name != "code_evaluator": raise ObjectHasInvalidValueException( @@ -2471,7 +2471,7 @@ async def execute( ) additional_engine_args = infer_addition_engine_args_from_model_name( - engine_request.model_config.model + engine_request.model_cfg.model ) if additional_engine_args.gpu_memory_utilization is not None: @@ -2502,7 +2502,7 @@ async def execute( repo=batch_bundle.image_repository, tag=batch_bundle.image_tag, resource_requests=hardware, - labels=engine_request.model_config.labels, + labels=engine_request.model_cfg.labels, mount_location=batch_bundle.mount_location, override_job_max_runtime_s=engine_request.max_runtime_sec, num_workers=engine_request.data_parallelism, diff --git a/model-engine/model_engine_server/inference/batch_inference/dto.py b/model-engine/model_engine_server/inference/batch_inference/dto.py index da63c545..109050c2 100644 --- a/model-engine/model_engine_server/inference/batch_inference/dto.py +++ b/model-engine/model_engine_server/inference/batch_inference/dto.py @@ -117,7 +117,7 @@ class CreateBatchCompletionsRequest(BaseModel): Request object for batch completions. """ - input_data_path: Optional[str] + input_data_path: Optional[str] = None output_data_path: str """ Path to the output file. The output file will be a JSON file of type List[CompletionOutput]. diff --git a/model-engine/model_engine_server/inference/common.py b/model-engine/model_engine_server/inference/common.py index 2655eb12..b8ddfea0 100644 --- a/model-engine/model_engine_server/inference/common.py +++ b/model-engine/model_engine_server/inference/common.py @@ -198,7 +198,7 @@ def predict_on_url(predict_fn: Callable, request_url: str, return_pickled: bool) def predict_on_args( predict_fn: Callable, inputs: RequestSchema, return_pickled: bool ) -> Dict[str, str]: - inputs_kwargs = inputs.__root__ + inputs_kwargs = inputs.root output = predict_fn(**inputs_kwargs) if return_pickled: diff --git a/model-engine/model_engine_server/inference/post_inference_hooks.py b/model-engine/model_engine_server/inference/post_inference_hooks.py index 3295c3b4..5d45b5cb 100644 --- a/model-engine/model_engine_server/inference/post_inference_hooks.py +++ b/model-engine/model_engine_server/inference/post_inference_hooks.py @@ -76,8 +76,8 @@ def handle( response["task_id"] = task_id auth = request_payload.callback_auth or self._default_callback_auth - if auth and isinstance(auth.__root__, CallbackBasicAuth): - auth_tuple = (auth.__root__.username, auth.__root__.password) + if auth and isinstance(auth.root, CallbackBasicAuth): + auth_tuple = (auth.root.username, auth.root.password) else: auth_tuple = (self._user_id, "") diff --git a/model-engine/model_engine_server/inference/requirements_base.txt b/model-engine/model_engine_server/inference/requirements_base.txt index 4561bd06..aeeb5efd 100644 --- a/model-engine/model_engine_server/inference/requirements_base.txt +++ b/model-engine/model_engine_server/inference/requirements_base.txt @@ -4,7 +4,7 @@ boto3~=1.34.33 celery[redis,sqs,tblib]==5.3.1 datadog-api-client==2.11.0 datadog~=0.47.0 -fastapi==0.78.0 +fastapi~=0.110.0 # Incompatibility between celery 5 and python 3.7 because of importlib-metadata 5, so we pin it importlib-metadata<5.0;python_version<"3.8" scale-launch>=0.1.0 @@ -21,3 +21,5 @@ json-log-formatter~=0.3 # model_engine_server/core/loggers.py tenacity>=6.0.0,<=6.2.0 # model_engine_server/core/loggers.py tqdm~=4.64 # model_engine_server/common/service_requests.py gunicorn~=20.0 +pydantic==2.8.2 + diff --git a/model-engine/model_engine_server/infra/gateways/live_async_model_endpoint_inference_gateway.py b/model-engine/model_engine_server/infra/gateways/live_async_model_endpoint_inference_gateway.py index 3c0408c8..f1c8b4f9 100644 --- a/model-engine/model_engine_server/infra/gateways/live_async_model_endpoint_inference_gateway.py +++ b/model-engine/model_engine_server/infra/gateways/live_async_model_endpoint_inference_gateway.py @@ -31,8 +31,8 @@ def create_task( *, task_name: str = DEFAULT_CELERY_TASK_NAME, ) -> CreateAsyncTaskV1Response: - # Use json.loads instead of predict_request.dict() because we have overridden the '__root__' - # key in some fields, and __root__ overriding only reflects in the json() output. + # Use json.loads instead of predict_request.dict() because we have overridden the 'root' + # key in some fields, and root overriding only reflects in the json() output. predict_args = json.loads(predict_request.json()) send_task_response = self.task_queue_gateway.send_task( diff --git a/model-engine/model_engine_server/infra/gateways/live_model_endpoints_schema_gateway.py b/model-engine/model_engine_server/infra/gateways/live_model_endpoints_schema_gateway.py index 5fac2841..f6f51d9c 100644 --- a/model-engine/model_engine_server/infra/gateways/live_model_endpoints_schema_gateway.py +++ b/model-engine/model_engine_server/infra/gateways/live_model_endpoints_schema_gateway.py @@ -1,11 +1,11 @@ import json from enum import Enum -from typing import Any, Callable, Dict, Sequence, Set, Type, Union +from typing import Any, Callable, Dict, List, Sequence, Set, Type, Union from fastapi import routing -from fastapi._compat import GenerateJsonSchema, get_model_definitions +from fastapi._compat import GenerateJsonSchema, get_definitions from fastapi.openapi.constants import REF_TEMPLATE -from fastapi.openapi.utils import get_openapi_path +from fastapi.openapi.utils import get_fields_from_routes, get_openapi_path from model_engine_server.common.dtos.tasks import ( EndpointPredictV1Request, GetAsyncTaskV1Response, @@ -72,6 +72,7 @@ def get_model_endpoints_schema( methods=["POST"], ) routes.append(route) + definitions = self.get_schemas_from_model_endpoint_record(record) definitions = LiveModelEndpointsSchemaGateway.update_model_definitions_with_prefix( prefix=record.name, model_definitions=definitions @@ -121,12 +122,19 @@ def get_openapi( prefix = model_endpoint_name model_name_map = LiveModelEndpointsSchemaGateway.get_model_name_map(prefix) schema_generator = GenerateJsonSchema(ref_template=REF_TEMPLATE) + all_fields = get_fields_from_routes([route]) + field_mapping, _ = get_definitions( + fields=all_fields, + schema_generator=schema_generator, + model_name_map=model_name_map, + ) + result = get_openapi_path( route=route, - model_name_map=model_name_map, operation_ids=operation_ids, schema_generator=schema_generator, - field_mapping={}, + model_name_map=model_name_map, + field_mapping=field_mapping, ) if result: path, security_schemes, path_definitions = result @@ -156,19 +164,17 @@ def update_model_definitions_with_prefix( Returns: Dict[str, Any]: The updated model definitions. """ - models: Set[Union[Type[BaseModel], Type[Enum]]] = { - CallbackAuth, - CallbackBasicAuth, - CallbackmTLSAuth, - TaskStatus, + models: List[Type[BaseModel]] = [ EndpointPredictV1Request, GetAsyncTaskV1Response, SyncEndpointPredictV1Response, - } - definitions = get_model_definitions( - flat_models=models, - model_name_map=LiveModelEndpointsSchemaGateway.get_model_name_map(prefix), + ] + + model_name_map = LiveModelEndpointsSchemaGateway.get_model_name_map(prefix) + definitions: Dict[str, Any] = LiveModelEndpointsSchemaGateway.get_model_definitions( + models=models, model_name_map=model_name_map ) + user_definitions = {} for k, v in model_definitions.items(): LiveModelEndpointsSchemaGateway.update_schema_refs_with_prefix(v, prefix) @@ -236,8 +242,8 @@ def get_default_model_definitions() -> Dict[str, Any]: global _default_model_definitions if _default_model_definitions is None: - _default_model_definitions = get_model_definitions( - flat_models={RequestSchema, ResponseSchema}, + _default_model_definitions = LiveModelEndpointsSchemaGateway.get_model_definitions( + models=[RequestSchema, ResponseSchema], model_name_map={ RequestSchema: "RequestSchema", ResponseSchema: "ResponseSchema", @@ -245,3 +251,21 @@ def get_default_model_definitions() -> Dict[str, Any]: ) return _default_model_definitions + + @staticmethod + def get_model_definitions( + models: Sequence[Type[BaseModel]], + model_name_map: Dict[Union[Type[BaseModel], Type[Enum]], str], + ) -> Dict[str, Any]: + """Get OpenAPI definitions for provided models using the name provided in model_name_map""" + + definitions = {} + for model in models: + schema = model.model_json_schema( + schema_generator=GenerateJsonSchema, ref_template=REF_TEMPLATE + ) + m_defs = schema.pop("$defs", {}) + definitions.update(m_defs) + model_name = model_name_map.get(model, model.__name__) + definitions[model_name] = schema + return definitions diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 024ca99e..af054dba 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -50,7 +50,7 @@ get_endpoint_resource_arguments_from_request, ) from packaging import version -from pydantic.utils import deep_update +from pydantic.v1.utils import deep_update logger = make_logger(logger_name()) diff --git a/model-engine/model_engine_server/infra/repositories/db_docker_image_batch_job_bundle_repository.py b/model-engine/model_engine_server/infra/repositories/db_docker_image_batch_job_bundle_repository.py index 4fa1948c..9e3cd17d 100644 --- a/model-engine/model_engine_server/infra/repositories/db_docker_image_batch_job_bundle_repository.py +++ b/model-engine/model_engine_server/infra/repositories/db_docker_image_batch_job_bundle_repository.py @@ -15,7 +15,7 @@ DbRepositoryMixin, raise_if_read_only, ) -from pydantic.error_wrappers import ValidationError +from pydantic import ValidationError class DbDockerImageBatchJobBundleRepository(DockerImageBatchJobBundleRepository, DbRepositoryMixin): diff --git a/model-engine/model_engine_server/infra/repositories/db_trigger_repository.py b/model-engine/model_engine_server/infra/repositories/db_trigger_repository.py index bb9cb5a3..b4114358 100644 --- a/model-engine/model_engine_server/infra/repositories/db_trigger_repository.py +++ b/model-engine/model_engine_server/infra/repositories/db_trigger_repository.py @@ -12,7 +12,7 @@ DbRepositoryMixin, raise_if_read_only, ) -from pydantic.error_wrappers import ValidationError +from pydantic import ValidationError from sqlalchemy.exc import IntegrityError diff --git a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py index aecef2c7..9f9f257d 100644 --- a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py +++ b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py @@ -307,6 +307,7 @@ async def build_endpoint( user_config_state=ModelEndpointUserConfigState( app_config=build_endpoint_request.model_endpoint_record.current_model_bundle.app_config, endpoint_config=ModelEndpointConfig( + endpoint_type=build_endpoint_request.model_endpoint_record.endpoint_type, endpoint_name=build_endpoint_request.model_endpoint_record.name, bundle_name=build_endpoint_request.model_endpoint_record.current_model_bundle.name, post_inference_hooks=build_endpoint_request.post_inference_hooks, diff --git a/model-engine/requirements.in b/model-engine/requirements.in index 2ef63150..f70d4503 100644 --- a/model-engine/requirements.in +++ b/model-engine/requirements.in @@ -37,7 +37,7 @@ protobuf~=3.20 psycopg2-binary==2.9.3 py-xid==0.3.0 pycurl~=7.44 # For celery[sqs] -pydantic==1.10.14 +pydantic==2.8.2 python-multipart~=0.0.7 quart==0.18.3 requests-auth-aws-sigv4~=0.7 diff --git a/model-engine/requirements.txt b/model-engine/requirements.txt index 71e7440d..fb0d4d24 100644 --- a/model-engine/requirements.txt +++ b/model-engine/requirements.txt @@ -18,6 +18,8 @@ alembic==1.8.1 # via -r model-engine/requirements.in amqp==5.1.1 # via kombu +annotated-types==0.7.0 + # via pydantic anyio==3.7.1 # via # azure-core @@ -362,10 +364,12 @@ pycurl==7.45.2 # -r model-engine/requirements.in # celery # kombu -pydantic==1.10.14 +pydantic==2.8.2 # via # -r model-engine/requirements.in # fastapi +pydantic-core==2.20.1 + # via pydantic pygments==2.15.1 # via # readme-renderer @@ -530,6 +534,7 @@ types-s3transfer==0.6.1 typing-extensions==4.10.0 # via # aioredis + # annotated-types # asgiref # azure-core # azure-keyvault-secrets @@ -552,6 +557,7 @@ typing-extensions==4.10.0 # mypy-boto3-s3 # mypy-boto3-sqs # pydantic + # pydantic-core # rich # sqlalchemy # starlette diff --git a/model-engine/tests/integration/inference/conftest.py b/model-engine/tests/integration/inference/conftest.py index fcd63dfc..07e900b0 100644 --- a/model-engine/tests/integration/inference/conftest.py +++ b/model-engine/tests/integration/inference/conftest.py @@ -47,7 +47,7 @@ def test_user_id() -> str: @pytest.fixture(scope="session") def test_default_callback_auth() -> CallbackAuth: return CallbackAuth( - __root__=CallbackBasicAuth(kind="basic", username="test_user", password="test_password") + root=CallbackBasicAuth(kind="basic", username="test_user", password="test_password") ) @@ -100,7 +100,7 @@ def launch_celery_app( f"--loglevel=INFO --concurrency=1 --queues={queue}" ) # Wait up to 10 seconds for process to start and be ready. - with subprocess.Popen( + with subprocess.Popen( # nosemgrep command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) as process: for attempt in Retrying( diff --git a/model-engine/tests/integration/inference/test_async_inference.py b/model-engine/tests/integration/inference/test_async_inference.py index db9bc9a7..d1d7f7c5 100644 --- a/model-engine/tests/integration/inference/test_async_inference.py +++ b/model-engine/tests/integration/inference/test_async_inference.py @@ -42,7 +42,7 @@ def redis_available() -> bool: @pytest.mark.parametrize( "task_args,cloudpickle,expected_status,expected_result", [ - ({"y": 1}, False, TaskStatus.SUCCESS, ResponseSchema(__root__={"result": "1"})), + ({"y": 1}, False, TaskStatus.SUCCESS, ResponseSchema(root={"result": "1"})), ({"x": False, "y": 1}, False, TaskStatus.FAILURE, None), ], ) diff --git a/model-engine/tests/unit/api/conftest.py b/model-engine/tests/unit/api/conftest.py index b312f7eb..2ca38500 100644 --- a/model-engine/tests/unit/api/conftest.py +++ b/model-engine/tests/unit/api/conftest.py @@ -789,7 +789,7 @@ def model_endpoint_1( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -831,7 +831,7 @@ def model_endpoint_1( "unavailable_workers": 1, }, "resource_state": { - "cpus": "1", + "cpus": 1, "gpus": 1, "memory": "1G", "gpu_type": "nvidia-tesla-t4", @@ -1363,7 +1363,7 @@ def create_batch_completions_request() -> Dict[str, Any]: "model_config": { "model": "mpt-7b", "checkpoint_path": "s3://test_checkpoint_path", - "labels": [], + "labels": {}, "num_shards": 2, }, "data_parallelism": 1, diff --git a/model-engine/tests/unit/api/test_tasks.py b/model-engine/tests/unit/api/test_tasks.py index 80f21734..f2cee1bb 100644 --- a/model-engine/tests/unit/api/test_tasks.py +++ b/model-engine/tests/unit/api/test_tasks.py @@ -410,8 +410,7 @@ async def test_create_streaming_task_success( count = 0 async for message in response.aiter_bytes(): assert ( - message - == b'data: {"status": "SUCCESS", "result": null, "traceback": null}\r\n\r\n' + message == b'data: {"status":"SUCCESS","result":null,"traceback":null}\r\n\r\n' ) count += 1 assert count == 1 diff --git a/model-engine/tests/unit/conftest.py b/model-engine/tests/unit/conftest.py index 2366019a..ec3850af 100644 --- a/model-engine/tests/unit/conftest.py +++ b/model-engine/tests/unit/conftest.py @@ -3197,7 +3197,7 @@ def build_endpoint_request_async_runnable_image( broker_type=BrokerType.SQS, default_callback_url="https://example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth(kind="basic", username="username", password="password") + root=CallbackBasicAuth(kind="basic", username="username", password="password") ), ) return build_endpoint_request @@ -3240,7 +3240,7 @@ def build_endpoint_request_streaming_runnable_image( broker_type=BrokerType.SQS, default_callback_url="https://example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth(kind="basic", username="username", password="password") + root=CallbackBasicAuth(kind="basic", username="username", password="password") ), ) return build_endpoint_request @@ -3283,7 +3283,7 @@ def build_endpoint_request_sync_runnable_image( broker_type=BrokerType.SQS, default_callback_url="https://example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth(kind="basic", username="username", password="password") + root=CallbackBasicAuth(kind="basic", username="username", password="password") ), ) return build_endpoint_request @@ -3326,7 +3326,7 @@ def build_endpoint_request_sync_pytorch( broker_type=BrokerType.SQS, default_callback_url="https://example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth(kind="basic", username="username", password="password") + root=CallbackBasicAuth(kind="basic", username="username", password="password") ), ) return build_endpoint_request @@ -3368,7 +3368,7 @@ def build_endpoint_request_async_tensorflow( optimize_costs=False, default_callback_url="https://example.com/path", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth(kind="basic", username="username", password="password") + root=CallbackBasicAuth(kind="basic", username="username", password="password") ), ) return build_endpoint_request @@ -3513,9 +3513,7 @@ def endpoint_predict_request_2() -> Tuple[EndpointPredictV1Request, Dict[str, An args=["test_arg_1", "test_arg_2"], callback_url="http://test_callback_url.xyz", callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( - kind="basic", username="test_username", password="test_password" - ) + root=CallbackBasicAuth(kind="basic", username="test_username", password="test_password") ), return_pickled=True, ) @@ -3594,7 +3592,7 @@ def llm_model_endpoint_async( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -3653,7 +3651,7 @@ def llm_model_endpoint_async( "unavailable_workers": 1, }, "resource_state": { - "cpus": "1", + "cpus": 1, "gpus": 1, "memory": "1G", "gpu_type": "nvidia-tesla-t4", @@ -3726,7 +3724,7 @@ def llm_model_endpoint_sync( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -3785,7 +3783,7 @@ def llm_model_endpoint_sync( "unavailable_workers": 1, }, "resource_state": { - "cpus": "1", + "cpus": 1, "gpus": 1, "memory": "1G", "gpu_type": "nvidia-tesla-t4", @@ -3858,7 +3856,7 @@ def llm_model_endpoint_stream( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -3917,7 +3915,7 @@ def llm_model_endpoint_stream( "unavailable_workers": 1, }, "resource_state": { - "cpus": "1", + "cpus": 1, "gpus": 1, "memory": "1G", "gpu_type": "nvidia-tesla-t4", @@ -3990,7 +3988,7 @@ def llm_model_endpoint_sync_tgi( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -4049,7 +4047,7 @@ def llm_model_endpoint_sync_tgi( "unavailable_workers": 1, }, "resource_state": { - "cpus": "1", + "cpus": 1, "gpus": 1, "memory": "1G", "gpu_type": "nvidia-tesla-t4", @@ -4122,7 +4120,7 @@ def llm_model_endpoint_sync_lightllm( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -4181,7 +4179,7 @@ def llm_model_endpoint_sync_lightllm( "unavailable_workers": 1, }, "resource_state": { - "cpus": "1", + "cpus": 1, "gpus": 1, "memory": "1G", "gpu_type": "nvidia-tesla-t4", @@ -4254,7 +4252,7 @@ def llm_model_endpoint_sync_trt_llm( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -4313,7 +4311,7 @@ def llm_model_endpoint_sync_trt_llm( "unavailable_workers": 1, }, "resource_state": { - "cpus": "1", + "cpus": 1, "gpus": 1, "memory": "1G", "gpu_type": "nvidia-tesla-t4", @@ -4451,7 +4449,7 @@ def llm_model_endpoint_text_generation_inference( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", @@ -4524,7 +4522,7 @@ def llm_model_endpoint_trt_llm( post_inference_hooks=["callback"], default_callback_url="http://www.example.com", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( + root=CallbackBasicAuth( kind="basic", username="test_username", password="test_password", diff --git a/model-engine/tests/unit/domain/conftest.py b/model-engine/tests/unit/domain/conftest.py index 937f3cfc..0a10cb77 100644 --- a/model-engine/tests/unit/domain/conftest.py +++ b/model-engine/tests/unit/domain/conftest.py @@ -533,7 +533,7 @@ def create_batch_completions_request() -> CreateBatchCompletionsRequest: model_config=CreateBatchCompletionsModelConfig( model="mpt-7b", checkpoint_path="s3://test_checkpoint_path", - labels=[], + labels={}, num_shards=2, ), data_parallelism=2, diff --git a/model-engine/tests/unit/domain/test_entities.py b/model-engine/tests/unit/domain/test_entities.py index 41533afc..cd0ab507 100644 --- a/model-engine/tests/unit/domain/test_entities.py +++ b/model-engine/tests/unit/domain/test_entities.py @@ -25,9 +25,7 @@ user_id="test_user", billing_queue="test_queue", default_callback_auth=CallbackAuth( - __root__=CallbackBasicAuth( - kind="basic", username="test_user", password="test_password" - ) + root=CallbackBasicAuth(kind="basic", username="test_user", password="test_password") ), ), ], diff --git a/model-engine/tests/unit/domain/test_llm_use_cases.py b/model-engine/tests/unit/domain/test_llm_use_cases.py index 8e310211..125aeab9 100644 --- a/model-engine/tests/unit/domain/test_llm_use_cases.py +++ b/model-engine/tests/unit/domain/test_llm_use_cases.py @@ -1961,7 +1961,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 32000, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "mixtral-8x7b", "") - assert hardware.cpus == "40" + assert hardware.cpus == 40 assert hardware.gpus == 2 assert hardware.memory == "160Gi" assert hardware.storage == "160Gi" @@ -1970,7 +1970,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): hardware = await _infer_hardware( fake_llm_artifact_gateway, "mixtral-8x7b", "", is_batch_job=True ) - assert hardware.cpus == "40" + assert hardware.cpus == 40 assert hardware.gpus == 2 assert hardware.memory == "160Gi" assert hardware.storage == "160Gi" @@ -2001,7 +2001,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 32000, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "mixtral-8x22b", "") - assert hardware.cpus == "160" + assert hardware.cpus == 160 assert hardware.gpus == 8 assert hardware.memory == "800Gi" assert hardware.storage == "640Gi" @@ -2010,7 +2010,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): hardware = await _infer_hardware( fake_llm_artifact_gateway, "mixtral-8x22b", "", is_batch_job=True ) - assert hardware.cpus == "160" + assert hardware.cpus == 160 assert hardware.gpus == 8 assert hardware.memory == "800Gi" assert hardware.storage == "640Gi" @@ -2037,14 +2037,14 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 32000, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-2-7b", "") - assert hardware.cpus == "5" + assert hardware.cpus == 5 assert hardware.gpus == 1 assert hardware.memory == "20Gi" assert hardware.storage == "40Gi" assert hardware.gpu_type == GpuType.NVIDIA_HOPPER_H100_1G_20GB hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-2-7b", "", is_batch_job=True) - assert hardware.cpus == "10" + assert hardware.cpus == 10 assert hardware.gpus == 1 assert hardware.memory == "40Gi" assert hardware.storage == "80Gi" @@ -2072,14 +2072,14 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 128256, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-3-8b", "") - assert hardware.cpus == "5" + assert hardware.cpus == 5 assert hardware.gpus == 1 assert hardware.memory == "20Gi" assert hardware.storage == "40Gi" assert hardware.gpu_type == GpuType.NVIDIA_HOPPER_H100_1G_20GB hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-3-8b", "", is_batch_job=True) - assert hardware.cpus == "10" + assert hardware.cpus == 10 assert hardware.gpus == 1 assert hardware.memory == "40Gi" assert hardware.storage == "80Gi" @@ -2106,7 +2106,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 32000, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-2-13b", "") - assert hardware.cpus == "10" + assert hardware.cpus == 10 assert hardware.gpus == 1 assert hardware.memory == "40Gi" assert hardware.storage == "80Gi" @@ -2115,7 +2115,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): hardware = await _infer_hardware( fake_llm_artifact_gateway, "llama-2-13b", "", is_batch_job=True ) - assert hardware.cpus == "20" + assert hardware.cpus == 20 assert hardware.gpus == 1 assert hardware.memory == "80Gi" assert hardware.storage == "96Gi" @@ -2142,7 +2142,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 32000, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "codellama-34b", "") - assert hardware.cpus == "20" + assert hardware.cpus == 20 assert hardware.gpus == 1 assert hardware.memory == "80Gi" assert hardware.storage == "96Gi" @@ -2151,7 +2151,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): hardware = await _infer_hardware( fake_llm_artifact_gateway, "codellama-34b", "", is_batch_job=True ) - assert hardware.cpus == "40" + assert hardware.cpus == 40 assert hardware.gpus == 2 assert hardware.memory == "160Gi" assert hardware.storage == "160Gi" @@ -2178,7 +2178,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 32000, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-2-70b", "") - assert hardware.cpus == "40" + assert hardware.cpus == 40 assert hardware.gpus == 2 assert hardware.memory == "160Gi" assert hardware.storage == "160Gi" @@ -2187,7 +2187,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): hardware = await _infer_hardware( fake_llm_artifact_gateway, "llama-2-70b", "", is_batch_job=True ) - assert hardware.cpus == "80" + assert hardware.cpus == 80 assert hardware.gpus == 4 assert hardware.memory == "320Gi" assert hardware.storage == "320Gi" @@ -2215,7 +2215,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 128256, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-3-70b", "") - assert hardware.cpus == "40" + assert hardware.cpus == 40 assert hardware.gpus == 2 assert hardware.memory == "160Gi" assert hardware.storage == "160Gi" @@ -2224,7 +2224,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): hardware = await _infer_hardware( fake_llm_artifact_gateway, "llama-3-70b", "", is_batch_job=True ) - assert hardware.cpus == "80" + assert hardware.cpus == 80 assert hardware.gpus == 4 assert hardware.memory == "320Gi" assert hardware.storage == "320Gi" @@ -2253,7 +2253,7 @@ async def test_infer_hardware(fake_llm_artifact_gateway): "vocab_size": 128256, } hardware = await _infer_hardware(fake_llm_artifact_gateway, "llama-3-8b-instruct-262k", "") - assert hardware.cpus == "40" + assert hardware.cpus == 40 assert hardware.gpus == 2 assert hardware.memory == "160Gi" assert hardware.storage == "160Gi" @@ -2283,7 +2283,7 @@ async def test_fill_hardware_info(fake_llm_artifact_gateway): labels={}, ) await _fill_hardware_info(fake_llm_artifact_gateway, request) - assert request.cpus == "40" + assert request.cpus == 40 assert request.gpus == 2 assert request.memory == "160Gi" assert request.storage == "160Gi" diff --git a/model-engine/tests/unit/infra/gateways/test_live_async_model_inference_gateway.py b/model-engine/tests/unit/infra/gateways/test_live_async_model_inference_gateway.py index e86f0f1f..1d38c223 100644 --- a/model-engine/tests/unit/infra/gateways/test_live_async_model_inference_gateway.py +++ b/model-engine/tests/unit/infra/gateways/test_live_async_model_inference_gateway.py @@ -52,7 +52,7 @@ def test_task_create_get_args_callback( task_queue_gateway: Any = fake_live_async_model_inference_gateway.task_queue_gateway assert len(task_queue_gateway.queue) == 1 assert task_queue_gateway.queue[task_id]["args"][0] == { - "args": endpoint_predict_request_2[0].args.__root__, + "args": endpoint_predict_request_2[0].args.root, "url": None, "cloudpickle": None, "callback_auth": json.loads(endpoint_predict_request_2[0].callback_auth.json()), diff --git a/model-engine/tests/unit/infra/gateways/test_live_batch_job_progress_gateway.py b/model-engine/tests/unit/infra/gateways/test_live_batch_job_progress_gateway.py index 2a3fe197..4112ac8b 100644 --- a/model-engine/tests/unit/infra/gateways/test_live_batch_job_progress_gateway.py +++ b/model-engine/tests/unit/infra/gateways/test_live_batch_job_progress_gateway.py @@ -34,4 +34,4 @@ def test_update_progress(test_api_key: str, fake_filesystem_gateway): progress=BatchJobProgress(num_tasks_pending=4, num_tasks_completed=5), ) handle = fake_filesystem_gateway.mock_open() - handle.write.assert_called_once_with('{"num_tasks_pending": 4, "num_tasks_completed": 5}') + handle.write.assert_called_once_with('{"num_tasks_pending":4,"num_tasks_completed":5}') diff --git a/requirements-docs.txt b/requirements-docs.txt index 51d81c23..fdc1a843 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -6,7 +6,7 @@ mkdocs-render-swagger-plugin~=0.0.4 mkdocs-simple-hooks~=0.1.5 mkdocs-video~=1.5.0 mkdocstrings[python]~=0.20.0 -pydantic~=1.10.0 +pydantic==2.8.2 neoteroi-mkdocs~=1.0.0 tabulate~=0.9.0 scale-llm-engine \ No newline at end of file