From b19c89451c8ea833527323ca91016db21192b73a Mon Sep 17 00:00:00 2001 From: Somasundaram Date: Mon, 25 Nov 2024 12:18:51 -0800 Subject: [PATCH 1/2] change: Updates for DJL 0.31.0 lmi and 0.30.0 neuronx release --- src/sagemaker/image_uri_config/djl-lmi.json | 37 +++++- .../image_uri_config/djl-neuronx.json | 114 ++---------------- .../image_uri_config/djl-tensorrtllm.json | 68 ----------- 3 files changed, 45 insertions(+), 174 deletions(-) diff --git a/src/sagemaker/image_uri_config/djl-lmi.json b/src/sagemaker/image_uri_config/djl-lmi.json index 0a741036c1..7415944945 100644 --- a/src/sagemaker/image_uri_config/djl-lmi.json +++ b/src/sagemaker/image_uri_config/djl-lmi.json @@ -3,9 +3,44 @@ "inference" ], "version_aliases": { - "latest": "0.30.0" + "latest": "0.31.0" }, "versions": { + "0.31.0": { + "registries": { + "af-south-1": "626614931356", + "il-central-1": "780543022126", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-northeast-3": "364406365360", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-3": "907027046896", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "eu-south-1": "692866216735", + "me-south-1": "217643126080", + "me-central-1": "914824155844", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-east-1": "446045086412", + "us-gov-west-1": "442386744353", + "us-west-1": "763104351884", + "us-west-2": "763104351884", + "ca-west-1": "204538143572" + }, + "repository": "djl-inference", + "tag_prefix": "0.31.0-lmi13.0.0-cu124" + }, "0.30.0": { "registries": { "af-south-1": "626614931356", diff --git a/src/sagemaker/image_uri_config/djl-neuronx.json b/src/sagemaker/image_uri_config/djl-neuronx.json index 3fd3c7619f..1ed2060d61 100644 --- a/src/sagemaker/image_uri_config/djl-neuronx.json +++ b/src/sagemaker/image_uri_config/djl-neuronx.json @@ -3,106 +3,10 @@ "inference" ], "version_aliases": { - "latest": "0.29.0" + "latest": "0.30.0" }, "versions": { - "0.29.0": { - "registries": { - "ap-northeast-1": "763104351884", - "ap-south-1": "763104351884", - "ap-southeast-1": "763104351884", - "ap-southeast-2": "763104351884", - "ap-southeast-5": "550225433462", - "cn-north-1": "727897471807", - "cn-northwest-1": "727897471807", - "eu-central-1": "763104351884", - "eu-central-2": "380420809688", - "eu-west-1": "763104351884", - "eu-west-3": "763104351884", - "sa-east-1": "763104351884", - "us-east-1": "763104351884", - "us-east-2": "763104351884", - "us-gov-east-1": "446045086412", - "us-gov-west-1": "442386744353", - "us-west-2": "763104351884", - "ca-west-1": "204538143572" - }, - "repository": "djl-inference", - "tag_prefix": "0.29.0-neuronx-sdk2.19.1" - }, - "0.28.0": { - "registries": { - "ap-northeast-1": "763104351884", - "ap-south-1": "763104351884", - "ap-southeast-1": "763104351884", - "ap-southeast-2": "763104351884", - "ap-southeast-5": "550225433462", - "cn-north-1": "727897471807", - "cn-northwest-1": "727897471807", - "eu-central-1": "763104351884", - "eu-central-2": "380420809688", - "eu-west-1": "763104351884", - "eu-west-3": "763104351884", - "sa-east-1": "763104351884", - "us-east-1": "763104351884", - "us-east-2": "763104351884", - "us-gov-east-1": "446045086412", - "us-gov-west-1": "442386744353", - "us-west-2": "763104351884", - "ca-west-1": "204538143572" - }, - "repository": "djl-inference", - "tag_prefix": "0.28.0-neuronx-sdk2.18.2" - }, - "0.27.0": { - "registries": { - "ap-northeast-1": "763104351884", - "ap-south-1": "763104351884", - "ap-southeast-1": "763104351884", - "ap-southeast-2": "763104351884", - "ap-southeast-5": "550225433462", - "cn-north-1": "727897471807", - "cn-northwest-1": "727897471807", - "eu-central-1": "763104351884", - "eu-central-2": "380420809688", - "eu-west-1": "763104351884", - "eu-west-3": "763104351884", - "sa-east-1": "763104351884", - "us-east-1": "763104351884", - "us-east-2": "763104351884", - "us-gov-east-1": "446045086412", - "us-gov-west-1": "442386744353", - "us-west-2": "763104351884", - "ca-west-1": "204538143572" - }, - "repository": "djl-inference", - "tag_prefix": "0.27.0-neuronx-sdk2.18.1" - }, - "0.26.0": { - "registries": { - "ap-northeast-1": "763104351884", - "ap-south-1": "763104351884", - "ap-southeast-1": "763104351884", - "ap-southeast-2": "763104351884", - "ap-southeast-5": "550225433462", - "cn-north-1": "727897471807", - "cn-northwest-1": "727897471807", - "eu-central-1": "763104351884", - "eu-central-2": "380420809688", - "eu-west-1": "763104351884", - "eu-west-3": "763104351884", - "sa-east-1": "763104351884", - "us-east-1": "763104351884", - "us-east-2": "763104351884", - "us-gov-east-1": "446045086412", - "us-gov-west-1": "442386744353", - "us-west-2": "763104351884", - "ca-west-1": "204538143572" - }, - "repository": "djl-inference", - "tag_prefix": "0.26.0-neuronx-sdk2.16.0" - }, - "0.25.0": { + "0.30.0": { "registries": { "ap-northeast-1": "763104351884", "ap-south-1": "763104351884", @@ -124,9 +28,9 @@ "ca-west-1": "204538143572" }, "repository": "djl-inference", - "tag_prefix": "0.25.0-neuronx-sdk2.15.0" + "tag_prefix": "0.30.0-neuronx-sdk2.20.1" }, - "0.24.0": { + "0.29.0": { "registries": { "ap-northeast-1": "763104351884", "ap-south-1": "763104351884", @@ -148,9 +52,9 @@ "ca-west-1": "204538143572" }, "repository": "djl-inference", - "tag_prefix": "0.24.0-neuronx-sdk2.14.1" + "tag_prefix": "0.29.0-neuronx-sdk2.19.1" }, - "0.23.0": { + "0.28.0": { "registries": { "ap-northeast-1": "763104351884", "ap-south-1": "763104351884", @@ -172,9 +76,9 @@ "ca-west-1": "204538143572" }, "repository": "djl-inference", - "tag_prefix": "0.23.0-neuronx-sdk2.12.0" + "tag_prefix": "0.28.0-neuronx-sdk2.18.2" }, - "0.22.1": { + "0.27.0": { "registries": { "ap-northeast-1": "763104351884", "ap-south-1": "763104351884", @@ -196,7 +100,7 @@ "ca-west-1": "204538143572" }, "repository": "djl-inference", - "tag_prefix": "0.22.1-neuronx-sdk2.10.0" + "tag_prefix": "0.27.0-neuronx-sdk2.18.1" } } } \ No newline at end of file diff --git a/src/sagemaker/image_uri_config/djl-tensorrtllm.json b/src/sagemaker/image_uri_config/djl-tensorrtllm.json index cd1e59bad8..c1606fa5ec 100644 --- a/src/sagemaker/image_uri_config/djl-tensorrtllm.json +++ b/src/sagemaker/image_uri_config/djl-tensorrtllm.json @@ -141,74 +141,6 @@ }, "repository": "djl-inference", "tag_prefix": "0.27.0-tensorrtllm0.8.0-cu122" - }, - "0.26.0": { - "registries": { - "af-south-1": "626614931356", - "il-central-1": "780543022126", - "ap-east-1": "871362719292", - "ap-northeast-1": "763104351884", - "ap-northeast-2": "763104351884", - "ap-northeast-3": "364406365360", - "ap-south-1": "763104351884", - "ap-southeast-1": "763104351884", - "ap-southeast-2": "763104351884", - "ap-southeast-3": "907027046896", - "ca-central-1": "763104351884", - "cn-north-1": "727897471807", - "cn-northwest-1": "727897471807", - "eu-central-1": "763104351884", - "eu-north-1": "763104351884", - "eu-west-1": "763104351884", - "eu-west-2": "763104351884", - "eu-west-3": "763104351884", - "eu-south-1": "692866216735", - "me-south-1": "217643126080", - "sa-east-1": "763104351884", - "us-east-1": "763104351884", - "us-east-2": "763104351884", - "us-gov-east-1": "446045086412", - "us-gov-west-1": "442386744353", - "us-west-1": "763104351884", - "us-west-2": "763104351884", - "ca-west-1": "204538143572" - }, - "repository": "djl-inference", - "tag_prefix": "0.26.0-tensorrtllm0.7.1-cu122" - }, - "0.25.0": { - "registries": { - "af-south-1": "626614931356", - "il-central-1": "780543022126", - "ap-east-1": "871362719292", - "ap-northeast-1": "763104351884", - "ap-northeast-2": "763104351884", - "ap-northeast-3": "364406365360", - "ap-south-1": "763104351884", - "ap-southeast-1": "763104351884", - "ap-southeast-2": "763104351884", - "ap-southeast-3": "907027046896", - "ca-central-1": "763104351884", - "cn-north-1": "727897471807", - "cn-northwest-1": "727897471807", - "eu-central-1": "763104351884", - "eu-north-1": "763104351884", - "eu-west-1": "763104351884", - "eu-west-2": "763104351884", - "eu-west-3": "763104351884", - "eu-south-1": "692866216735", - "me-south-1": "217643126080", - "sa-east-1": "763104351884", - "us-east-1": "763104351884", - "us-east-2": "763104351884", - "us-gov-east-1": "446045086412", - "us-gov-west-1": "442386744353", - "us-west-1": "763104351884", - "us-west-2": "763104351884", - "ca-west-1": "204538143572" - }, - "repository": "djl-inference", - "tag_prefix": "0.25.0-tensorrtllm0.5.0-cu122" } } } \ No newline at end of file From e536429f42a7c136bf823207c63fa8f0f324b50b Mon Sep 17 00:00:00 2001 From: Somasundaram Date: Mon, 2 Dec 2024 16:07:56 -0800 Subject: [PATCH 2/2] change OnnxRuntime engine to Rust for text embedding --- src/sagemaker/djl_inference/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/djl_inference/model.py b/src/sagemaker/djl_inference/model.py index 8c724a6502..2a23cba9ec 100644 --- a/src/sagemaker/djl_inference/model.py +++ b/src/sagemaker/djl_inference/model.py @@ -148,7 +148,7 @@ def _infer_engine(self) -> Optional[str]: return self.engine if self.task == "text-embedding": - return "OnnxRuntime" + return "Rust" return "Python" def _infer_image_uri(self):