From 1a3cca68cab89b5907fad03fff15e32198c0779f Mon Sep 17 00:00:00 2001 From: merk0ff Date: Mon, 18 Nov 2024 13:04:07 +0300 Subject: [PATCH] Add local_files_only for SentenceTransformer --- config.py | 5 +++++ vectorizer.py | 7 +++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index 467f504..8cf6189 100644 --- a/config.py +++ b/config.py @@ -2,6 +2,11 @@ from typing import List TRUST_REMOTE_CODE = os.getenv("TRUST_REMOTE_CODE", False) +ST_LOCAL_FILES_ONLY = os.getenv("ST_LOCAL_FILES_ONLY", "False").lower() in ( + "true", + "1", + "t", +) def get_allowed_tokens() -> List[str] | None: diff --git a/vectorizer.py b/vectorizer.py index 50a52ef..ae3cfce 100644 --- a/vectorizer.py +++ b/vectorizer.py @@ -20,6 +20,8 @@ T5Tokenizer, ) +from config import ST_LOCAL_FILES_ONLY + # limit transformer batch size to limit parallel inference, otherwise we run # into memory problems @@ -96,6 +98,7 @@ def __init__( cache_folder=model_path, device=self.get_device(), trust_remote_code=trust_remote_code, + local_files_only=ST_LOCAL_FILES_ONLY, ) self.model.eval() # make sure we're in inference mode, not training @@ -258,7 +261,6 @@ def vectorize(self, text: str, config: VectorInputConfig): class HFModel: - def __init__(self, cuda_support: bool, cuda_core: str, trust_remote_code: bool): super().__init__() self.model = None @@ -331,7 +333,6 @@ def pool_sum(self, embeddings, attention_mask): class DPRModel(HFModel): - def __init__( self, architecture: str, @@ -364,7 +365,6 @@ def pool_embedding(self, batch_results, tokens, config: VectorInputConfig): class T5Model(HFModel): - def __init__(self, cuda_support: bool, cuda_core: str, trust_remote_code: bool): super().__init__(cuda_support, cuda_core) self.model = None @@ -406,7 +406,6 @@ def get_batch_results(self, tokens, text): class ModelFactory: - @staticmethod def model( model_type,