From 5985b8f09b67f60a595d1b4226e44acbf0cbb124 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Wed, 4 Dec 2024 17:44:05 +0800 Subject: [PATCH 01/14] feat: support alias for semantics generation --- .../generation/semantics_description.py | 29 +++++++++++-------- .../web/v1/routers/semantics_description.py | 8 ++++- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/semantics_description.py index d85826f98..0d860330f 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/semantics_description.py @@ -46,7 +46,7 @@ def extract(model: dict) -> dict: return [ extract(model) for model in mdl.get("models", []) - if model.get("name", "") in selected_models + if model.get("name", "") in selected_models or "*" in selected_models ] @@ -90,6 +90,7 @@ def wrapper(text: str) -> str: ## End of Pipeline class ModelProperties(BaseModel): + alias: str description: str @@ -135,12 +136,12 @@ class SemanticResult(BaseModel): ] ``` -Your task is to update this JSON structure by adding a `description` field inside both the `properties` attribute of each `column` and the `model` itself. -Each `description` should be derived from a user-provided input that explains the purpose or context of the `model` and its respective columns. +Your task is to update this JSON structure by adding `description`, `alias` fields inside both the `properties` attribute of each `column` and the `model` itself. +Each `description`, `alias` should be derived from a user-provided input that explains the purpose or context of the `model` and its respective columns. Follow these steps: -1. **For the `model`**: Prompt the user to provide a brief description of the model's overall purpose or its context. Insert this description in the `properties` field of the `model`. -2. **For each `column`**: Ask the user to describe each column's role or significance. Each column's description should be added under its respective `properties` field in the format: `'description': 'user-provided text'`. -3. Ensure that the output is a well-formatted JSON structure, preserving the input's original format and adding the appropriate `description` fields. +1. **For the `model`**: Prompt the user to provide a brief description and alias of the model's overall purpose or its context. Insert this description and alias in the `properties` field of the `model`. +2. **For each `column`**: Ask the user to describe each column's role or significance. Each column's description and alias should be added under its respective `properties` field in the format: `'description': 'user-provided text'`, `'alias': 'user-provided text'`. +3. Ensure that the output is a well-formatted JSON structure, preserving the input's original format and adding the appropriate `description`, `alias` fields. ### Output Format: @@ -153,25 +154,29 @@ class SemanticResult(BaseModel): { "name": "column_1", "properties": { + "alias": "", "description": "" } }, { "name": "column_2", "properties": { - "description": "" + "alias": "", + "description": "" } }, { "name": "column_3", "properties": { - "description": "" + "alias": "", + "description": "" } } ], "properties": { - "description": "" - } + "alias": "", + "description": "" + } } ] } @@ -186,7 +191,7 @@ class SemanticResult(BaseModel): Picked models: {{ picked_models }} Localization Language: {{ language }} -Please provide a brief description for the model and each column based on the user's prompt. +Please provide a brief description and alias for the model and each column based on the user's prompt. """ @@ -233,7 +238,7 @@ async def run( SemanticsDescription, "semantics_description", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", - selected_models=[], mdl={}, + selected_models=["*"], language="en", ) diff --git a/wren-ai-service/src/web/v1/routers/semantics_description.py b/wren-ai-service/src/web/v1/routers/semantics_description.py index b8d47bcbf..4aec029c4 100644 --- a/wren-ai-service/src/web/v1/routers/semantics_description.py +++ b/wren-ai-service/src/web/v1/routers/semantics_description.py @@ -31,7 +31,7 @@ "mdl": "{ ... }", # JSON string of the MDL (Model Definition Language) "project_id": "project-id", # Optional project ID "configuration": { # Optional configuration settings - "language": "English" # Optional language, defaults to "English" + "language": "en" # Optional language, defaults to "en" } } - Response: PostResponse @@ -52,9 +52,11 @@ "columns": [ { "name": "col1", + "alias": "col1_alias", "description": "Unique identifier for each record in the example model." } ], + "alias": "model1_alias", "description": "This model is used for analysis purposes, capturing key attributes of records." }, { @@ -62,9 +64,11 @@ "columns": [ { "name": "col1", + "alias": "col1_alias", "description": "Unique identifier for each record in the example model." } ], + "alias": "model2_alias", "description": "This model is used for analysis purposes, capturing key attributes of records." } ], @@ -154,10 +158,12 @@ def _formatter(response: Optional[dict]) -> Optional[list[dict]]: "columns": [ { "name": column["name"], + "alias": column["properties"].get("alias", ""), "description": column["properties"].get("description", ""), } for column in model_data["columns"] ], + "alias": model_data["properties"].get("alias", ""), "description": model_data["properties"].get("description", ""), } for model_name, model_data in response.items() From a455dd4779e97fad8fd66b7d215ca9c6f6286cd8 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Wed, 4 Dec 2024 17:51:20 +0800 Subject: [PATCH 02/14] feat: asterisk for all models --- .../src/pipelines/generation/semantics_description.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/semantics_description.py index 0d860330f..1cde86c83 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/semantics_description.py @@ -43,11 +43,10 @@ def extract(model: dict) -> dict: }, } - return [ - extract(model) - for model in mdl.get("models", []) - if model.get("name", "") in selected_models or "*" in selected_models - ] + def model_picker(model: dict) -> bool: + return model.get("name", "") in selected_models or "*" in selected_models + + return [extract(model) for model in mdl.get("models", []) if model_picker(model)] @observe(capture_input=False) From aa535bd0260e7a4c644f83922e4d6391d6497c28 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 14:47:11 +0800 Subject: [PATCH 03/14] feat: optimize the prompt --- .../generation/semantics_description.py | 111 ++++++++---------- 1 file changed, 47 insertions(+), 64 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/semantics_description.py index 1cde86c83..561eeb46e 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/semantics_description.py @@ -17,7 +17,7 @@ ## Start of Pipeline @observe(capture_input=False) -def picked_models(mdl: dict, selected_models: list[str]) -> list[dict]: +def picked_models(mdl: dict) -> list[dict]: def relation_filter(column: dict) -> bool: return "relationship" not in column @@ -27,6 +27,7 @@ def column_formatter(columns: list[dict]) -> list[dict]: "name": column["name"], "type": column["type"], "properties": { + "alias": column["properties"].get("displayName", ""), "description": column["properties"].get("description", ""), }, } @@ -35,18 +36,17 @@ def column_formatter(columns: list[dict]) -> list[dict]: ] def extract(model: dict) -> dict: + prop = model["properties"] return { "name": model["name"], "columns": column_formatter(model["columns"]), "properties": { - "description": model["properties"].get("description", ""), + "alias": prop.get("displayName", ""), + "description": prop.get("description", ""), }, } - def model_picker(model: dict) -> bool: - return model.get("name", "") in selected_models or "*" in selected_models - - return [extract(model) for model in mdl.get("models", []) if model_picker(model)] + return [extract(model) for model in mdl.get("models", [])] @observe(capture_input=False) @@ -119,69 +119,54 @@ class SemanticResult(BaseModel): } system_prompt = """ -I have a data model represented in JSON format, with the following structure: - -``` -[ - {'name': 'model', 'columns': [ - {'name': 'column_1', 'type': 'type', 'properties': {} - }, - {'name': 'column_2', 'type': 'type', 'properties': {} - }, - {'name': 'column_3', 'type': 'type', 'properties': {} - } - ], 'properties': {} - } -] -``` - -Your task is to update this JSON structure by adding `description`, `alias` fields inside both the `properties` attribute of each `column` and the `model` itself. -Each `description`, `alias` should be derived from a user-provided input that explains the purpose or context of the `model` and its respective columns. -Follow these steps: -1. **For the `model`**: Prompt the user to provide a brief description and alias of the model's overall purpose or its context. Insert this description and alias in the `properties` field of the `model`. -2. **For each `column`**: Ask the user to describe each column's role or significance. Each column's description and alias should be added under its respective `properties` field in the format: `'description': 'user-provided text'`, `'alias': 'user-provided text'`. -3. Ensure that the output is a well-formatted JSON structure, preserving the input's original format and adding the appropriate `description`, `alias` fields. - -### Output Format: - -``` +You are a data model expert. Your task is to enrich a JSON data model with descriptive metadata. + +Input Format: +[{ + 'name': 'model', + 'columns': [{'name': 'column', 'type': 'type', 'properties': {'alias': 'alias', 'description': 'description'}}], + 'properties': {'alias': 'alias', 'description': 'description'} +}] + +For each model and column, you will: +1. Add a clear, concise alias that serves as a business-friendly name +2. Add a detailed description explaining its purpose and usage + +Guidelines: +- Descriptions should be clear, concise and business-focused +- Aliases should be intuitive and user-friendly +- Use the user's context to inform the descriptions +- Maintain technical accuracy while being accessible to non-technical users + +Output Format: { - "models": [ - { + "models": [{ "name": "model", - "columns": [ - { - "name": "column_1", - "properties": { - "alias": "", - "description": "" - } - }, - { - "name": "column_2", - "properties": { - "alias": "", - "description": "" - } - }, - { - "name": "column_3", - "properties": { - "alias": "", - "description": "" - } + "columns": [{ + "name": "column", + "properties": { + "alias": "User-friendly column name", + "description": "Clear explanation of column purpose" } - ], + }], "properties": { - "alias": "", - "description": "" + "alias": "User-friendly model name", + "description": "Clear explanation of model purpose" } - } - ] + }] +} + +Example: +Input model "orders" with column "created_at" might become: +{ + "name": "created_at", + "properties": { + "alias": "Order Creation Date", + "description": "Timestamp when the order was first created in the system" + } } -``` -Make sure that the descriptions are concise, informative, and contextually appropriate based on the input provided by the user. +Focus on providing business value through clear, accurate descriptions while maintaining JSON structure integrity. """ user_prompt_template = """ @@ -213,7 +198,6 @@ def __init__(self, llm_provider: LLMProvider, **_): async def run( self, user_prompt: str, - selected_models: list[str], mdl: dict, language: str = "en", ) -> dict: @@ -222,7 +206,6 @@ async def run( [self._final], inputs={ "user_prompt": user_prompt, - "selected_models": selected_models, "mdl": mdl, "language": language, **self._components, From c987341b2fa35b1f6be82086d5374ec807d662ff Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 15:07:53 +0800 Subject: [PATCH 04/14] feat: picking model in the service level --- .../src/web/v1/services/semantics_description.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/wren-ai-service/src/web/v1/services/semantics_description.py b/wren-ai-service/src/web/v1/services/semantics_description.py index 040f333dc..aed141aa8 100644 --- a/wren-ai-service/src/web/v1/services/semantics_description.py +++ b/wren-ai-service/src/web/v1/services/semantics_description.py @@ -65,24 +65,20 @@ def _chunking( "language": request.configuration.language, } + def _model_picker(model: dict, selected: list[str]) -> bool: + return model["name"] in selected or "*" in selected + chunks = [ { **model, "columns": model["columns"][i : i + chunk_size], } for model in mdl_dict["models"] - if model["name"] in request.selected_models + if _model_picker(model, request.selected_models) for i in range(0, len(model["columns"]), chunk_size) ] - return [ - { - **template, - "mdl": {"models": [chunk]}, - "selected_models": [chunk["name"]], - } - for chunk in chunks - ] + return [{**template, "mdl": {"models": [chunk]}} for chunk in chunks] async def _generate_task(self, request_id: str, chunk: dict): resp = await self._pipelines["semantics_description"].run(**chunk) From 42207b66d8d1aa7f5b30c918b95ceb31c2a46d81 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 15:51:14 +0800 Subject: [PATCH 05/14] chore: rename the semantics description to a more fitable name --- deployment/kustomizations/base/cm.yaml | 4 +- docker/config.example.yaml | 4 +- wren-ai-service/src/globals.py | 13 ++-- .../src/pipelines/generation/__init__.py | 3 + ...tics_description.py => model_semantics.py} | 13 ++-- .../src/web/v1/routers/__init__.py | 4 +- ...tics_description.py => model_semantics.py} | 10 +-- ...tics_description.py => model_semantics.py} | 18 +++-- wren-ai-service/tests/data/config.test.yaml | 2 +- ...description.py => test_model_semantics.py} | 74 +++++++++---------- .../tools/config/config.example.yaml | 4 +- wren-ai-service/tools/config/config.full.yaml | 4 +- 12 files changed, 77 insertions(+), 76 deletions(-) rename wren-ai-service/src/pipelines/generation/{semantics_description.py => model_semantics.py} (94%) rename wren-ai-service/src/web/v1/routers/{semantics_description.py => model_semantics.py} (95%) rename wren-ai-service/src/web/v1/services/{semantics_description.py => model_semantics.py} (87%) rename wren-ai-service/tests/pytest/services/{test_semantics_description.py => test_model_semantics.py} (79%) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index 145fbcd64..205b59369 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -141,8 +141,8 @@ data: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/docker/config.example.yaml b/docker/config.example.yaml index 8f2f8a804..efaf08384 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -91,8 +91,8 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index 3c8f5c758..51b4004b3 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -6,7 +6,7 @@ from src.config import Settings from src.core.pipeline import PipelineComponent from src.core.provider import EmbedderProvider, LLMProvider -from src.pipelines import indexing +from src.pipelines import generation, indexing from src.pipelines.generation import ( chart_adjustment, chart_generation, @@ -15,7 +15,6 @@ intent_classification, question_recommendation, relationship_recommendation, - semantics_description, sql_answer, sql_breakdown, sql_correction, @@ -35,9 +34,9 @@ from src.web.v1.services.ask_details import AskDetailsService from src.web.v1.services.chart import ChartService from src.web.v1.services.chart_adjustment import ChartAdjustmentService +from src.web.v1.services.model_semantics import ModelSemantics from src.web.v1.services.question_recommendation import QuestionRecommendation from src.web.v1.services.relationship_recommendation import RelationshipRecommendation -from src.web.v1.services.semantics_description import SemanticsDescription from src.web.v1.services.semantics_preparation import SemanticsPreparationService from src.web.v1.services.sql_answer import SqlAnswerService from src.web.v1.services.sql_expansion import SqlExpansionService @@ -53,7 +52,7 @@ class ServiceContainer: ask_details_service: AskDetailsService question_recommendation: QuestionRecommendation relationship_recommendation: RelationshipRecommendation - semantics_description: SemanticsDescription + model_semantics: ModelSemantics semantics_preparation_service: SemanticsPreparationService chart_service: ChartService chart_adjustment_service: ChartAdjustmentService @@ -78,10 +77,10 @@ def create_service_container( "ttl": settings.query_cache_ttl, } return ServiceContainer( - semantics_description=SemanticsDescription( + model_semantics=ModelSemantics( pipelines={ - "semantics_description": semantics_description.SemanticsDescription( - **pipe_components["semantics_description"], + "model_semantics": generation.ModelSemantics( + **pipe_components["model_semantics"], ) }, **query_cache, diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py index e69de29bb..c65240899 100644 --- a/wren-ai-service/src/pipelines/generation/__init__.py +++ b/wren-ai-service/src/pipelines/generation/__init__.py @@ -0,0 +1,3 @@ +from .model_semantics import ModelSemantics + +__all__ = ["ModelSemantics"] diff --git a/wren-ai-service/src/pipelines/generation/semantics_description.py b/wren-ai-service/src/pipelines/generation/model_semantics.py similarity index 94% rename from wren-ai-service/src/pipelines/generation/semantics_description.py rename to wren-ai-service/src/pipelines/generation/model_semantics.py index 561eeb46e..6169d2411 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_description.py +++ b/wren-ai-service/src/pipelines/generation/model_semantics.py @@ -108,11 +108,11 @@ class SemanticResult(BaseModel): models: list[SemanticModel] -SEMANTICS_DESCRIPTION_MODEL_KWARGS = { +MODEL_SEMANTICS_KWARGS = { "response_format": { "type": "json_schema", "json_schema": { - "name": "semantic_description", + "name": "model_semantics", "schema": SemanticResult.model_json_schema(), }, } @@ -179,13 +179,13 @@ class SemanticResult(BaseModel): """ -class SemanticsDescription(BasicPipeline): +class ModelSemantics(BasicPipeline): def __init__(self, llm_provider: LLMProvider, **_): self._components = { "prompt_builder": PromptBuilder(template=user_prompt_template), "generator": llm_provider.get_generator( system_prompt=system_prompt, - generation_kwargs=SEMANTICS_DESCRIPTION_MODEL_KWARGS, + generation_kwargs=MODEL_SEMANTICS_KWARGS, ), } self._final = "normalize" @@ -201,7 +201,6 @@ async def run( mdl: dict, language: str = "en", ) -> dict: - logger.info("Semantics Description Generation pipeline is running...") return await self._pipe.execute( [self._final], inputs={ @@ -217,8 +216,8 @@ async def run( from src.pipelines.common import dry_run_pipeline dry_run_pipeline( - SemanticsDescription, - "semantics_description", + ModelSemantics, + "model_semantics", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", mdl={}, selected_models=["*"], diff --git a/wren-ai-service/src/web/v1/routers/__init__.py b/wren-ai-service/src/web/v1/routers/__init__.py index d3ebd402e..a2250daee 100644 --- a/wren-ai-service/src/web/v1/routers/__init__.py +++ b/wren-ai-service/src/web/v1/routers/__init__.py @@ -5,9 +5,9 @@ ask_details, chart, chart_adjustment, + model_semantics, question_recommendation, relationship_recommendation, - semantics_description, semantics_preparations, sql_answers, sql_expansions, @@ -20,7 +20,7 @@ router.include_router(ask_details.router) router.include_router(question_recommendation.router) router.include_router(relationship_recommendation.router) -router.include_router(semantics_description.router) +router.include_router(model_semantics.router) router.include_router(semantics_preparations.router) router.include_router(sql_answers.router) router.include_router(sql_expansions.router) diff --git a/wren-ai-service/src/web/v1/routers/semantics_description.py b/wren-ai-service/src/web/v1/routers/model_semantics.py similarity index 95% rename from wren-ai-service/src/web/v1/routers/semantics_description.py rename to wren-ai-service/src/web/v1/routers/model_semantics.py index 4aec029c4..70fe61874 100644 --- a/wren-ai-service/src/web/v1/routers/semantics_description.py +++ b/wren-ai-service/src/web/v1/routers/model_semantics.py @@ -12,7 +12,7 @@ get_service_metadata, ) from src.web.v1.services import Configuration -from src.web.v1.services.semantics_description import SemanticsDescription +from src.web.v1.services.model_semantics import ModelSemantics router = APIRouter() @@ -113,10 +113,10 @@ async def generate( service_metadata: ServiceMetadata = Depends(get_service_metadata), ) -> PostResponse: id = str(uuid.uuid4()) - service = service_container.semantics_description + service = service_container.model_semantics - service[id] = SemanticsDescription.Resource(id=id) - input = SemanticsDescription.Input( + service[id] = ModelSemantics.Resource(id=id) + input = ModelSemantics.Input( id=id, selected_models=request.selected_models, user_prompt=request.user_prompt, @@ -146,7 +146,7 @@ async def get( id: str, service_container: ServiceContainer = Depends(get_service_container), ) -> GetResponse: - resource = service_container.semantics_description[id] + resource = service_container.model_semantics[id] def _formatter(response: Optional[dict]) -> Optional[list[dict]]: if response is None: diff --git a/wren-ai-service/src/web/v1/services/semantics_description.py b/wren-ai-service/src/web/v1/services/model_semantics.py similarity index 87% rename from wren-ai-service/src/web/v1/services/semantics_description.py rename to wren-ai-service/src/web/v1/services/model_semantics.py index aed141aa8..64a54da73 100644 --- a/wren-ai-service/src/web/v1/services/semantics_description.py +++ b/wren-ai-service/src/web/v1/services/model_semantics.py @@ -14,7 +14,7 @@ logger = logging.getLogger("wren-ai-service") -class SemanticsDescription: +class ModelSemantics: class Input(BaseModel): id: str selected_models: list[str] @@ -40,7 +40,7 @@ def __init__( ttl: int = 120, ): self._pipelines = pipelines - self._cache: Dict[str, SemanticsDescription.Resource] = TTLCache( + self._cache: Dict[str, ModelSemantics.Resource] = TTLCache( maxsize=maxsize, ttl=ttl ) @@ -55,7 +55,7 @@ def _handle_exception( status="failed", error=self.Resource.Error(code=code, message=error_message), ) - logger.error(error_message) + logger.error(f"Project ID: {request.project_id}, {error_message}") def _chunking( self, mdl_dict: dict, request: Input, chunk_size: int = 50 @@ -81,7 +81,7 @@ def _model_picker(model: dict, selected: list[str]) -> bool: return [{**template, "mdl": {"models": [chunk]}} for chunk in chunks] async def _generate_task(self, request_id: str, chunk: dict): - resp = await self._pipelines["semantics_description"].run(**chunk) + resp = await self._pipelines["model_semantics"].run(**chunk) normalize = resp.get("normalize") current = self[request_id] @@ -94,10 +94,12 @@ async def _generate_task(self, request_id: str, chunk: dict): current.response[key]["columns"].extend(normalize[key]["columns"]) - @observe(name="Generate Semantics Description") + @observe(name="Generate Model Semantics") @trace_metadata async def generate(self, request: Input, **kwargs) -> Resource: - logger.info("Generate Semantics Description pipeline is running...") + logger.info( + f"Project ID: {request.project_id}, Generate Model Semantics pipeline is running..." + ) try: mdl_dict = orjson.loads(request.mdl) @@ -117,7 +119,7 @@ async def generate(self, request: Input, **kwargs) -> Resource: except Exception as e: self._handle_exception( request, - f"An error occurred during semantics description generation: {str(e)}", + f"An error occurred during model semantics generation: {str(e)}", ) return self[request.id].with_metadata() @@ -126,7 +128,7 @@ def __getitem__(self, id: str) -> Resource: response = self._cache.get(id) if response is None: - message = f"Semantics Description Resource with ID '{id}' not found." + message = f"Model Semantics Resource with ID '{id}' not found." logger.exception(message) return self.Resource( id=id, diff --git a/wren-ai-service/tests/data/config.test.yaml b/wren-ai-service/tests/data/config.test.yaml index 006f4d15c..f65ac3e57 100644 --- a/wren-ai-service/tests/data/config.test.yaml +++ b/wren-ai-service/tests/data/config.test.yaml @@ -70,7 +70,7 @@ pipes: - name: sql_regeneration llm: openai_llm.gpt-4o-mini engine: wren_ui - - name: semantics_description + - name: model_semantics llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: openai_llm.gpt-4o-mini diff --git a/wren-ai-service/tests/pytest/services/test_semantics_description.py b/wren-ai-service/tests/pytest/services/test_model_semantics.py similarity index 79% rename from wren-ai-service/tests/pytest/services/test_semantics_description.py rename to wren-ai-service/tests/pytest/services/test_model_semantics.py index dc48e7339..31f82c7e4 100644 --- a/wren-ai-service/tests/pytest/services/test_semantics_description.py +++ b/wren-ai-service/tests/pytest/services/test_model_semantics.py @@ -4,7 +4,7 @@ import orjson import pytest -from src.web.v1.services.semantics_description import SemanticsDescription +from src.web.v1.services.model_semantics import ModelSemantics @pytest.fixture @@ -19,16 +19,16 @@ def service(): } } - pipelines = {"semantics_description": mock_pipeline} - return SemanticsDescription(pipelines=pipelines) + pipelines = {"model_semantics": mock_pipeline} + return ModelSemantics(pipelines=pipelines) @pytest.mark.asyncio -async def test_generate_semantics_description( - service: SemanticsDescription, +async def test_generate_model_semantics( + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -50,11 +50,11 @@ async def test_generate_semantics_description( @pytest.mark.asyncio -async def test_generate_semantics_description_with_invalid_mdl( - service: SemanticsDescription, +async def test_generate_model_semantics_with_invalid_mdl( + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -72,20 +72,18 @@ async def test_generate_semantics_description_with_invalid_mdl( @pytest.mark.asyncio -async def test_generate_semantics_description_with_exception( - service: SemanticsDescription, +async def test_generate_model_semantics_with_exception( + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', ) - service._pipelines["semantics_description"].run.side_effect = Exception( - "Test exception" - ) + service._pipelines["model_semantics"].run.side_effect = Exception("Test exception") await service.generate(request) response = service[request.id] @@ -100,10 +98,10 @@ async def test_generate_semantics_description_with_exception( ) -def test_get_semantics_description_result( - service: SemanticsDescription, +def test_get_model_semantics_result( + service: ModelSemantics, ): - expected_response = SemanticsDescription.Resource( + expected_response = ModelSemantics.Resource( id="test_id", status="finished", response={"model1": {"description": "Test description"}}, @@ -115,8 +113,8 @@ def test_get_semantics_description_result( assert result == expected_response -def test_get_non_existent_semantics_description_result( - service: SemanticsDescription, +def test_get_non_existent_model_semantics_result( + service: ModelSemantics, ): result = service["non_existent_id"] @@ -129,10 +127,10 @@ def test_get_non_existent_semantics_description_result( @pytest.mark.asyncio async def test_batch_processing_with_multiple_models( - service: SemanticsDescription, + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3"], @@ -140,7 +138,7 @@ async def test_batch_processing_with_multiple_models( ) # Mock pipeline responses for each chunk - service._pipelines["semantics_description"].run.side_effect = [ + service._pipelines["model_semantics"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, {"normalize": {"model2": {"description": "Description 2"}}}, {"normalize": {"model3": {"description": "Description 3"}}}, @@ -165,10 +163,10 @@ async def test_batch_processing_with_multiple_models( def test_batch_processing_with_custom_chunk_size( - service: SemanticsDescription, + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3", "model4"], @@ -188,10 +186,10 @@ def test_batch_processing_with_custom_chunk_size( @pytest.mark.asyncio async def test_batch_processing_partial_failure( - service: SemanticsDescription, + service: ModelSemantics, ): - service["test_id"] = SemanticsDescription.Resource(id="test_id") - request = SemanticsDescription.Input( + service["test_id"] = ModelSemantics.Resource(id="test_id") + request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2"], @@ -199,7 +197,7 @@ async def test_batch_processing_partial_failure( ) # Mock first chunk succeeds, second chunk fails - service._pipelines["semantics_description"].run.side_effect = [ + service._pipelines["model_semantics"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, Exception("Failed processing model2"), ] @@ -215,12 +213,12 @@ async def test_batch_processing_partial_failure( @pytest.mark.asyncio async def test_concurrent_updates_no_race_condition( - service: SemanticsDescription, + service: ModelSemantics, ): test_id = "concurrent_test" - service[test_id] = SemanticsDescription.Resource(id=test_id) + service[test_id] = ModelSemantics.Resource(id=test_id) - request = SemanticsDescription.Input( + request = ModelSemantics.Input( id=test_id, user_prompt="Test concurrent updates", selected_models=["model1", "model2", "model3", "model4", "model5"], @@ -236,7 +234,7 @@ async def delayed_response(model_num, delay=0.1): } } - service._pipelines["semantics_description"].run.side_effect = [ + service._pipelines["model_semantics"].run.side_effect = [ await delayed_response(1), await delayed_response(2), await delayed_response(3), diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index 2ba4c8ecc..8c4d90676 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -105,8 +105,8 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index 742cafaaf..0c2bc24ce 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -124,8 +124,8 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: semantics_description - llm: litellm_llm.gpt-4o-mini-2024-07-18 + - name: model_semantics + llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui From b7e053bbcef9167682549d73a49b75484e69b95d Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 16:06:53 +0800 Subject: [PATCH 06/14] feat: update test cases --- .../pytest/services/test_model_semantics.py | 91 +++++++++++++++---- 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/wren-ai-service/tests/pytest/services/test_model_semantics.py b/wren-ai-service/tests/pytest/services/test_model_semantics.py index 31f82c7e4..354c487da 100644 --- a/wren-ai-service/tests/pytest/services/test_model_semantics.py +++ b/wren-ai-service/tests/pytest/services/test_model_semantics.py @@ -13,8 +13,21 @@ def service(): mock_pipeline.run.return_value = { "normalize": { "model1": { - "columns": [], - "properties": {"description": "Test description"}, + "columns": [ + { + "name": "column1", + "type": "varchar", + "notNull": False, + "properties": { + "description": "Test description", + "alias": "column1_alias", + }, + } + ], + "properties": { + "description": "Test description", + "alias": "model1_alias", + }, } } } @@ -42,8 +55,21 @@ async def test_generate_model_semantics( assert response.status == "finished" assert response.response == { "model1": { - "columns": [], - "properties": {"description": "Test description"}, + "columns": [ + { + "name": "column1", + "type": "varchar", + "notNull": False, + "properties": { + "description": "Test description", + "alias": "column1_alias", + }, + } + ], + "properties": { + "description": "Test description", + "alias": "model1_alias", + }, } } assert response.error is None @@ -93,8 +119,7 @@ async def test_generate_model_semantics_with_exception( assert response.response is None assert response.error.code == "OTHERS" assert ( - "An error occurred during semantics description generation" - in response.error.message + "An error occurred during model semantics generation:" in response.error.message ) @@ -159,41 +184,75 @@ async def test_batch_processing_with_multiple_models( assert len(chunks) == 3 # Default chunk_size=1 assert all("user_prompt" in chunk for chunk in chunks) assert all("mdl" in chunk for chunk in chunks) - assert [len(chunk["selected_models"]) for chunk in chunks] == [1, 1, 1] def test_batch_processing_with_custom_chunk_size( service: ModelSemantics, ): + test_mdl = { + "models": [ + { + "name": "model1", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model2", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model3", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model4", + "columns": [ + {"name": "column1", "type": "varchar", "notNull": False}, + {"name": "column2", "type": "varchar", "notNull": False}, + ], + }, + ] + } service["test_id"] = ModelSemantics.Resource(id="test_id") request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3", "model4"], - mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model2", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model3", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model4", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', + mdl=orjson.dumps(test_mdl), ) # Test chunking with custom chunk size - chunks = service._chunking(orjson.loads(request.mdl), request, chunk_size=2) + chunks = service._chunking(orjson.loads(request.mdl), request, chunk_size=1) - assert len(chunks) == 4 - assert [len(chunk["selected_models"]) for chunk in chunks] == [1, 1, 1, 1] - assert chunks[0]["selected_models"] == ["model1"] - assert chunks[1]["selected_models"] == ["model2"] - assert chunks[2]["selected_models"] == ["model3"] - assert chunks[3]["selected_models"] == ["model4"] + assert len(chunks) == 5 + assert chunks[0]["mdl"]["models"][0]["name"] == "model1" + assert chunks[1]["mdl"]["models"][0]["name"] == "model2" + assert chunks[2]["mdl"]["models"][0]["name"] == "model3" + assert chunks[3]["mdl"]["models"][0]["name"] == "model4" + assert chunks[4]["mdl"]["models"][0]["name"] == "model4" @pytest.mark.asyncio async def test_batch_processing_partial_failure( service: ModelSemantics, ): + test_mdl = { + "models": [ + { + "name": "model1", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + { + "name": "model2", + "columns": [{"name": "column1", "type": "varchar", "notNull": False}], + }, + ] + } service["test_id"] = ModelSemantics.Resource(id="test_id") request = ModelSemantics.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2"], - mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}, {"name": "model2", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', + mdl=orjson.dumps(test_mdl), ) # Mock first chunk succeeds, second chunk fails From 2bd91fa82cdab04ac4cb1bf04228e529cf02f3b2 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 16:08:07 +0800 Subject: [PATCH 07/14] feat: change the attr name for web sepc --- wren-ai-service/src/web/v1/routers/model_semantics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/src/web/v1/routers/model_semantics.py b/wren-ai-service/src/web/v1/routers/model_semantics.py index 70fe61874..079d8f727 100644 --- a/wren-ai-service/src/web/v1/routers/model_semantics.py +++ b/wren-ai-service/src/web/v1/routers/model_semantics.py @@ -163,7 +163,7 @@ def _formatter(response: Optional[dict]) -> Optional[list[dict]]: } for column in model_data["columns"] ], - "alias": model_data["properties"].get("alias", ""), + "displayName": model_data["properties"].get("alias", ""), "description": model_data["properties"].get("description", ""), } for model_name, model_data in response.items() From 569aadb831604300ecf9b9974e70293694d31b85 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 17:47:35 +0800 Subject: [PATCH 08/14] chore: rename the semantics description to a better name --- deployment/kustomizations/base/cm.yaml | 2 +- docker/config.example.yaml | 2 +- wren-ai-service/src/globals.py | 10 +-- .../src/pipelines/generation/__init__.py | 4 +- ...l_semantics.py => semantics_enrichment.py} | 12 +-- .../src/web/v1/routers/__init__.py | 4 +- ...l_semantics.py => semantics_enrichment.py} | 78 +++++++++---------- ...l_semantics.py => semantics_enrichment.py} | 14 ++-- wren-ai-service/tests/data/config.test.yaml | 2 +- ...antics.py => test_semantics_enrichment.py} | 78 +++++++++---------- .../tools/config/config.example.yaml | 2 +- wren-ai-service/tools/config/config.full.yaml | 2 +- 12 files changed, 105 insertions(+), 105 deletions(-) rename wren-ai-service/src/pipelines/generation/{model_semantics.py => semantics_enrichment.py} (96%) rename wren-ai-service/src/web/v1/routers/{model_semantics.py => semantics_enrichment.py} (73%) rename wren-ai-service/src/web/v1/services/{model_semantics.py => semantics_enrichment.py} (89%) rename wren-ai-service/tests/pytest/services/{test_model_semantics.py => test_semantics_enrichment.py} (81%) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index 205b59369..87f44ccc8 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -141,7 +141,7 @@ data: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 diff --git a/docker/config.example.yaml b/docker/config.example.yaml index efaf08384..b82b7625d 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -91,7 +91,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index 51b4004b3..e72cf39cb 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -34,9 +34,9 @@ from src.web.v1.services.ask_details import AskDetailsService from src.web.v1.services.chart import ChartService from src.web.v1.services.chart_adjustment import ChartAdjustmentService -from src.web.v1.services.model_semantics import ModelSemantics from src.web.v1.services.question_recommendation import QuestionRecommendation from src.web.v1.services.relationship_recommendation import RelationshipRecommendation +from src.web.v1.services.semantics_enrichment import SemanticsEnrichment from src.web.v1.services.semantics_preparation import SemanticsPreparationService from src.web.v1.services.sql_answer import SqlAnswerService from src.web.v1.services.sql_expansion import SqlExpansionService @@ -52,7 +52,7 @@ class ServiceContainer: ask_details_service: AskDetailsService question_recommendation: QuestionRecommendation relationship_recommendation: RelationshipRecommendation - model_semantics: ModelSemantics + semantics_enrichment: SemanticsEnrichment semantics_preparation_service: SemanticsPreparationService chart_service: ChartService chart_adjustment_service: ChartAdjustmentService @@ -77,10 +77,10 @@ def create_service_container( "ttl": settings.query_cache_ttl, } return ServiceContainer( - model_semantics=ModelSemantics( + semantics_enrichment=SemanticsEnrichment( pipelines={ - "model_semantics": generation.ModelSemantics( - **pipe_components["model_semantics"], + "semantics_enrichment": generation.SemanticsEnrichment( + **pipe_components["semantics_enrichment"], ) }, **query_cache, diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py index c65240899..e86c31801 100644 --- a/wren-ai-service/src/pipelines/generation/__init__.py +++ b/wren-ai-service/src/pipelines/generation/__init__.py @@ -1,3 +1,3 @@ -from .model_semantics import ModelSemantics +from .semantics_enrichment import SemanticsEnrichment -__all__ = ["ModelSemantics"] +__all__ = ["SemanticsEnrichment"] diff --git a/wren-ai-service/src/pipelines/generation/model_semantics.py b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py similarity index 96% rename from wren-ai-service/src/pipelines/generation/model_semantics.py rename to wren-ai-service/src/pipelines/generation/semantics_enrichment.py index 6169d2411..41fbbe508 100644 --- a/wren-ai-service/src/pipelines/generation/model_semantics.py +++ b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py @@ -108,11 +108,11 @@ class SemanticResult(BaseModel): models: list[SemanticModel] -MODEL_SEMANTICS_KWARGS = { +semantics_enrichment_KWARGS = { "response_format": { "type": "json_schema", "json_schema": { - "name": "model_semantics", + "name": "semantics_enrichment", "schema": SemanticResult.model_json_schema(), }, } @@ -179,13 +179,13 @@ class SemanticResult(BaseModel): """ -class ModelSemantics(BasicPipeline): +class SemanticsEnrichment(BasicPipeline): def __init__(self, llm_provider: LLMProvider, **_): self._components = { "prompt_builder": PromptBuilder(template=user_prompt_template), "generator": llm_provider.get_generator( system_prompt=system_prompt, - generation_kwargs=MODEL_SEMANTICS_KWARGS, + generation_kwargs=semantics_enrichment_KWARGS, ), } self._final = "normalize" @@ -216,8 +216,8 @@ async def run( from src.pipelines.common import dry_run_pipeline dry_run_pipeline( - ModelSemantics, - "model_semantics", + SemanticsEnrichment, + "semantics_enrichment", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", mdl={}, selected_models=["*"], diff --git a/wren-ai-service/src/web/v1/routers/__init__.py b/wren-ai-service/src/web/v1/routers/__init__.py index a2250daee..542dbcb63 100644 --- a/wren-ai-service/src/web/v1/routers/__init__.py +++ b/wren-ai-service/src/web/v1/routers/__init__.py @@ -5,9 +5,9 @@ ask_details, chart, chart_adjustment, - model_semantics, question_recommendation, relationship_recommendation, + semantics_enrichment, semantics_preparations, sql_answers, sql_expansions, @@ -20,7 +20,7 @@ router.include_router(ask_details.router) router.include_router(question_recommendation.router) router.include_router(relationship_recommendation.router) -router.include_router(model_semantics.router) +router.include_router(semantics_enrichment.router) router.include_router(semantics_preparations.router) router.include_router(sql_answers.router) router.include_router(sql_expansions.router) diff --git a/wren-ai-service/src/web/v1/routers/model_semantics.py b/wren-ai-service/src/web/v1/routers/semantics_enrichment.py similarity index 73% rename from wren-ai-service/src/web/v1/routers/model_semantics.py rename to wren-ai-service/src/web/v1/routers/semantics_enrichment.py index 079d8f727..0068187a4 100644 --- a/wren-ai-service/src/web/v1/routers/model_semantics.py +++ b/wren-ai-service/src/web/v1/routers/semantics_enrichment.py @@ -12,18 +12,18 @@ get_service_metadata, ) from src.web.v1.services import Configuration -from src.web.v1.services.model_semantics import ModelSemantics +from src.web.v1.services.semantics_enrichment import SemanticsEnrichment router = APIRouter() """ -Semantics Description Router +Semantics Enrichment Router -This router handles endpoints related to generating and retrieving semantic descriptions. +This router handles endpoints related to generating and retrieving semantics enrichment for data models. Endpoints: -1. POST /semantics-descriptions - - Generates a new semantic description +1. POST /semantics-enrichment + - Generates a new semantics enrichment task for data models - Request body: PostRequest { "selected_models": ["model1", "model2"], # List of model names to describe @@ -39,46 +39,36 @@ "id": "unique-uuid" # Unique identifier for the generated description } -2. GET /semantics-descriptions/{id} - - Retrieves the status and result of a semantic description generation +2. GET /semantics-enrichment/{id} + - Retrieves the status and result of a semantics enrichment generation - Path parameter: id (str) - Response: GetResponse { "id": "unique-uuid", # Unique identifier of the description "status": "generating" | "finished" | "failed", - "response": [ # Present only if status is "finished" or "generating" - { - "name": "model1", - "columns": [ - { - "name": "col1", - "alias": "col1_alias", - "description": "Unique identifier for each record in the example model." - } - ], - "alias": "model1_alias", - "description": "This model is used for analysis purposes, capturing key attributes of records." - }, - { - "name": "model2", - "columns": [ - { - "name": "col1", - "alias": "col1_alias", - "description": "Unique identifier for each record in the example model." - } - ], - "alias": "model2_alias", - "description": "This model is used for analysis purposes, capturing key attributes of records." - } - ], + "response": { # Present only if status is "finished" or "generating" + "models": [ + { + "name": "model1", + "columns": [ + { + "name": "col1", + "displayName": "col1_alias", + "description": "Unique identifier for each record in the example model." + } + ], + "displayName": "model1_alias", + "description": "This model is used for analysis purposes, capturing key attributes of records." + } + ] + }, "error": { # Present only if status is "failed" "code": "OTHERS", "message": "Error description" } } -The semantic description generation is an asynchronous process. The POST endpoint +The semantics enrichment generation is an asynchronous process. The POST endpoint initiates the generation and returns immediately with an ID. The GET endpoint can then be used to check the status and retrieve the result when it's ready. @@ -102,9 +92,14 @@ class PostResponse(BaseModel): id: str +@router.post( + "/semantics-enrichment", + response_model=PostResponse, +) @router.post( "/semantics-descriptions", response_model=PostResponse, + deprecated=True, ) async def generate( request: PostRequest, @@ -113,10 +108,10 @@ async def generate( service_metadata: ServiceMetadata = Depends(get_service_metadata), ) -> PostResponse: id = str(uuid.uuid4()) - service = service_container.model_semantics + service = service_container.semantics_enrichment - service[id] = ModelSemantics.Resource(id=id) - input = ModelSemantics.Input( + service[id] = SemanticsEnrichment.Resource(id=id) + input = SemanticsEnrichment.Input( id=id, selected_models=request.selected_models, user_prompt=request.user_prompt, @@ -138,15 +133,20 @@ class GetResponse(BaseModel): error: Optional[dict] +@router.get( + "/semantics-enrichment/{id}", + response_model=GetResponse, +) @router.get( "/semantics-descriptions/{id}", response_model=GetResponse, + deprecated=True, ) async def get( id: str, service_container: ServiceContainer = Depends(get_service_container), ) -> GetResponse: - resource = service_container.model_semantics[id] + resource = service_container.semantics_enrichment[id] def _formatter(response: Optional[dict]) -> Optional[list[dict]]: if response is None: @@ -158,7 +158,7 @@ def _formatter(response: Optional[dict]) -> Optional[list[dict]]: "columns": [ { "name": column["name"], - "alias": column["properties"].get("alias", ""), + "displayName": column["properties"].get("alias", ""), "description": column["properties"].get("description", ""), } for column in model_data["columns"] diff --git a/wren-ai-service/src/web/v1/services/model_semantics.py b/wren-ai-service/src/web/v1/services/semantics_enrichment.py similarity index 89% rename from wren-ai-service/src/web/v1/services/model_semantics.py rename to wren-ai-service/src/web/v1/services/semantics_enrichment.py index 64a54da73..8ad0e1af1 100644 --- a/wren-ai-service/src/web/v1/services/model_semantics.py +++ b/wren-ai-service/src/web/v1/services/semantics_enrichment.py @@ -14,7 +14,7 @@ logger = logging.getLogger("wren-ai-service") -class ModelSemantics: +class SemanticsEnrichment: class Input(BaseModel): id: str selected_models: list[str] @@ -40,7 +40,7 @@ def __init__( ttl: int = 120, ): self._pipelines = pipelines - self._cache: Dict[str, ModelSemantics.Resource] = TTLCache( + self._cache: Dict[str, SemanticsEnrichment.Resource] = TTLCache( maxsize=maxsize, ttl=ttl ) @@ -81,7 +81,7 @@ def _model_picker(model: dict, selected: list[str]) -> bool: return [{**template, "mdl": {"models": [chunk]}} for chunk in chunks] async def _generate_task(self, request_id: str, chunk: dict): - resp = await self._pipelines["model_semantics"].run(**chunk) + resp = await self._pipelines["semantics_enrichment"].run(**chunk) normalize = resp.get("normalize") current = self[request_id] @@ -94,11 +94,11 @@ async def _generate_task(self, request_id: str, chunk: dict): current.response[key]["columns"].extend(normalize[key]["columns"]) - @observe(name="Generate Model Semantics") + @observe(name="Enrich Semantics") @trace_metadata async def generate(self, request: Input, **kwargs) -> Resource: logger.info( - f"Project ID: {request.project_id}, Generate Model Semantics pipeline is running..." + f"Project ID: {request.project_id}, Enrich Semantics pipeline is running..." ) try: @@ -119,7 +119,7 @@ async def generate(self, request: Input, **kwargs) -> Resource: except Exception as e: self._handle_exception( request, - f"An error occurred during model semantics generation: {str(e)}", + f"An error occurred during semantics enrichment: {str(e)}", ) return self[request.id].with_metadata() @@ -128,7 +128,7 @@ def __getitem__(self, id: str) -> Resource: response = self._cache.get(id) if response is None: - message = f"Model Semantics Resource with ID '{id}' not found." + message = f"Semantics Enrichment Resource with ID '{id}' not found." logger.exception(message) return self.Resource( id=id, diff --git a/wren-ai-service/tests/data/config.test.yaml b/wren-ai-service/tests/data/config.test.yaml index f65ac3e57..e583bb65b 100644 --- a/wren-ai-service/tests/data/config.test.yaml +++ b/wren-ai-service/tests/data/config.test.yaml @@ -70,7 +70,7 @@ pipes: - name: sql_regeneration llm: openai_llm.gpt-4o-mini engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: openai_llm.gpt-4o-mini diff --git a/wren-ai-service/tests/pytest/services/test_model_semantics.py b/wren-ai-service/tests/pytest/services/test_semantics_enrichment.py similarity index 81% rename from wren-ai-service/tests/pytest/services/test_model_semantics.py rename to wren-ai-service/tests/pytest/services/test_semantics_enrichment.py index 354c487da..2d9c31f40 100644 --- a/wren-ai-service/tests/pytest/services/test_model_semantics.py +++ b/wren-ai-service/tests/pytest/services/test_semantics_enrichment.py @@ -4,7 +4,7 @@ import orjson import pytest -from src.web.v1.services.model_semantics import ModelSemantics +from src.web.v1.services.semantics_enrichment import SemanticsEnrichment @pytest.fixture @@ -32,16 +32,16 @@ def service(): } } - pipelines = {"model_semantics": mock_pipeline} - return ModelSemantics(pipelines=pipelines) + pipelines = {"semantics_enrichment": mock_pipeline} + return SemanticsEnrichment(pipelines=pipelines) @pytest.mark.asyncio -async def test_generate_model_semantics( - service: ModelSemantics, +async def test_generate_semantics_enrichment( + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -76,11 +76,11 @@ async def test_generate_model_semantics( @pytest.mark.asyncio -async def test_generate_model_semantics_with_invalid_mdl( - service: ModelSemantics, +async def test_generate_semantics_enrichment_with_invalid_mdl( + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], @@ -98,18 +98,20 @@ async def test_generate_model_semantics_with_invalid_mdl( @pytest.mark.asyncio -async def test_generate_model_semantics_with_exception( - service: ModelSemantics, +async def test_generate_semantics_enrichment_with_exception( + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the model", selected_models=["model1"], mdl='{"models": [{"name": "model1", "columns": [{"name": "column1", "type": "varchar", "notNull": false}]}]}', ) - service._pipelines["model_semantics"].run.side_effect = Exception("Test exception") + service._pipelines["semantics_enrichment"].run.side_effect = Exception( + "Test exception" + ) await service.generate(request) response = service[request.id] @@ -118,15 +120,13 @@ async def test_generate_model_semantics_with_exception( assert response.status == "failed" assert response.response is None assert response.error.code == "OTHERS" - assert ( - "An error occurred during model semantics generation:" in response.error.message - ) + assert "An error occurred during semantics enrichment:" in response.error.message -def test_get_model_semantics_result( - service: ModelSemantics, +def test_get_semantics_enrichment_result( + service: SemanticsEnrichment, ): - expected_response = ModelSemantics.Resource( + expected_response = SemanticsEnrichment.Resource( id="test_id", status="finished", response={"model1": {"description": "Test description"}}, @@ -138,8 +138,8 @@ def test_get_model_semantics_result( assert result == expected_response -def test_get_non_existent_model_semantics_result( - service: ModelSemantics, +def test_get_non_existent_semantics_enrichment_result( + service: SemanticsEnrichment, ): result = service["non_existent_id"] @@ -152,10 +152,10 @@ def test_get_non_existent_model_semantics_result( @pytest.mark.asyncio async def test_batch_processing_with_multiple_models( - service: ModelSemantics, + service: SemanticsEnrichment, ): - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3"], @@ -163,7 +163,7 @@ async def test_batch_processing_with_multiple_models( ) # Mock pipeline responses for each chunk - service._pipelines["model_semantics"].run.side_effect = [ + service._pipelines["semantics_enrichment"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, {"normalize": {"model2": {"description": "Description 2"}}}, {"normalize": {"model3": {"description": "Description 3"}}}, @@ -187,7 +187,7 @@ async def test_batch_processing_with_multiple_models( def test_batch_processing_with_custom_chunk_size( - service: ModelSemantics, + service: SemanticsEnrichment, ): test_mdl = { "models": [ @@ -212,8 +212,8 @@ def test_batch_processing_with_custom_chunk_size( }, ] } - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2", "model3", "model4"], @@ -233,7 +233,7 @@ def test_batch_processing_with_custom_chunk_size( @pytest.mark.asyncio async def test_batch_processing_partial_failure( - service: ModelSemantics, + service: SemanticsEnrichment, ): test_mdl = { "models": [ @@ -247,8 +247,8 @@ async def test_batch_processing_partial_failure( }, ] } - service["test_id"] = ModelSemantics.Resource(id="test_id") - request = ModelSemantics.Input( + service["test_id"] = SemanticsEnrichment.Resource(id="test_id") + request = SemanticsEnrichment.Input( id="test_id", user_prompt="Describe the models", selected_models=["model1", "model2"], @@ -256,7 +256,7 @@ async def test_batch_processing_partial_failure( ) # Mock first chunk succeeds, second chunk fails - service._pipelines["model_semantics"].run.side_effect = [ + service._pipelines["semantics_enrichment"].run.side_effect = [ {"normalize": {"model1": {"description": "Description 1"}}}, Exception("Failed processing model2"), ] @@ -272,12 +272,12 @@ async def test_batch_processing_partial_failure( @pytest.mark.asyncio async def test_concurrent_updates_no_race_condition( - service: ModelSemantics, + service: SemanticsEnrichment, ): test_id = "concurrent_test" - service[test_id] = ModelSemantics.Resource(id=test_id) + service[test_id] = SemanticsEnrichment.Resource(id=test_id) - request = ModelSemantics.Input( + request = SemanticsEnrichment.Input( id=test_id, user_prompt="Test concurrent updates", selected_models=["model1", "model2", "model3", "model4", "model5"], @@ -293,7 +293,7 @@ async def delayed_response(model_num, delay=0.1): } } - service._pipelines["model_semantics"].run.side_effect = [ + service._pipelines["semantics_enrichment"].run.side_effect = [ await delayed_response(1), await delayed_response(2), await delayed_response(3), diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index 8c4d90676..102693f6f 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -105,7 +105,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index 0c2bc24ce..7869b46f1 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -124,7 +124,7 @@ pipes: - name: sql_regeneration llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - - name: model_semantics + - name: semantics_enrichment llm: openai_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 From 60e410e2a5716276aefc332d8ce738a818edfa38 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 5 Dec 2024 17:54:01 +0800 Subject: [PATCH 09/14] feat: remove unused parameter --- wren-ai-service/src/pipelines/generation/semantics_enrichment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wren-ai-service/src/pipelines/generation/semantics_enrichment.py b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py index 41fbbe508..d824ae123 100644 --- a/wren-ai-service/src/pipelines/generation/semantics_enrichment.py +++ b/wren-ai-service/src/pipelines/generation/semantics_enrichment.py @@ -220,6 +220,5 @@ async def run( "semantics_enrichment", user_prompt="Track student enrollments, grades, and GPA calculations to monitor academic performance and identify areas for student support", mdl={}, - selected_models=["*"], language="en", ) From 968b79aebe64c41b3f8659d0c439d700b6e092ef Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Fri, 6 Dec 2024 15:36:31 +0800 Subject: [PATCH 10/14] chore: expose the class from package level for generation pipe --- wren-ai-service/src/globals.py | 57 +++++++------------ .../src/pipelines/generation/__init__.py | 34 ++++++++++- 2 files changed, 53 insertions(+), 38 deletions(-) diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index e72cf39cb..63db21fca 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -1,4 +1,4 @@ -import logging +import logging # noqa: I001 from dataclasses import asdict, dataclass import toml @@ -7,23 +7,6 @@ from src.core.pipeline import PipelineComponent from src.core.provider import EmbedderProvider, LLMProvider from src.pipelines import generation, indexing -from src.pipelines.generation import ( - chart_adjustment, - chart_generation, - data_assistance, - followup_sql_generation, - intent_classification, - question_recommendation, - relationship_recommendation, - sql_answer, - sql_breakdown, - sql_correction, - sql_expansion, - sql_explanation, - sql_generation, - sql_regeneration, - sql_summary, -) from src.pipelines.retrieval import ( historical_question, preprocess_sql_data, @@ -102,10 +85,10 @@ def create_service_container( ), ask_service=AskService( pipelines={ - "intent_classification": intent_classification.IntentClassification( + "intent_classification": generation.IntentClassification( **pipe_components["intent_classification"], ), - "data_assistance": data_assistance.DataAssistance( + "data_assistance": generation.DataAssistance( **pipe_components["data_assistance"] ), "retrieval": retrieval.Retrieval( @@ -117,16 +100,16 @@ def create_service_container( "historical_question": historical_question.HistoricalQuestion( **pipe_components["historical_question_retrieval"], ), - "sql_generation": sql_generation.SQLGeneration( + "sql_generation": generation.SQLGeneration( **pipe_components["sql_generation"], ), - "sql_correction": sql_correction.SQLCorrection( + "sql_correction": generation.SQLCorrection( **pipe_components["sql_correction"], ), - "followup_sql_generation": followup_sql_generation.FollowUpSQLGeneration( + "followup_sql_generation": generation.FollowUpSQLGeneration( **pipe_components["followup_sql_generation"], ), - "sql_summary": sql_summary.SQLSummary( + "sql_summary": generation.SQLSummary( **pipe_components["sql_summary"], ), }, @@ -137,7 +120,7 @@ def create_service_container( "sql_executor": sql_executor.SQLExecutor( **pipe_components["sql_executor"], ), - "chart_generation": chart_generation.ChartGeneration( + "chart_generation": generation.ChartGeneration( **pipe_components["chart_generation"], ), }, @@ -148,7 +131,7 @@ def create_service_container( "sql_executor": sql_executor.SQLExecutor( **pipe_components["sql_executor"], ), - "chart_adjustment": chart_adjustment.ChartAdjustment( + "chart_adjustment": generation.ChartAdjustment( **pipe_components["chart_adjustment"], ), }, @@ -159,7 +142,7 @@ def create_service_container( "preprocess_sql_data": preprocess_sql_data.PreprocessSqlData( **pipe_components["preprocess_sql_data"], ), - "sql_answer": sql_answer.SQLAnswer( + "sql_answer": generation.SQLAnswer( **pipe_components["sql_answer"], ), }, @@ -167,10 +150,10 @@ def create_service_container( ), ask_details_service=AskDetailsService( pipelines={ - "sql_breakdown": sql_breakdown.SQLBreakdown( + "sql_breakdown": generation.SQLBreakdown( **pipe_components["sql_breakdown"], ), - "sql_summary": sql_summary.SQLSummary( + "sql_summary": generation.SQLSummary( **pipe_components["sql_summary"], ), }, @@ -183,13 +166,13 @@ def create_service_container( table_retrieval_size=settings.table_retrieval_size, table_column_retrieval_size=settings.table_column_retrieval_size, ), - "sql_expansion": sql_expansion.SQLExpansion( + "sql_expansion": generation.SQLExpansion( **pipe_components["sql_expansion"], ), - "sql_correction": sql_correction.SQLCorrection( + "sql_correction": generation.SQLCorrection( **pipe_components["sql_correction"], ), - "sql_summary": sql_summary.SQLSummary( + "sql_summary": generation.SQLSummary( **pipe_components["sql_summary"], ), }, @@ -197,7 +180,7 @@ def create_service_container( ), sql_explanation_service=SQLExplanationService( pipelines={ - "sql_explanation": sql_explanation.SQLExplanation( + "sql_explanation": generation.SQLExplanation( **pipe_components["sql_explanation"], ) }, @@ -205,7 +188,7 @@ def create_service_container( ), sql_regeneration_service=SQLRegenerationService( pipelines={ - "sql_regeneration": sql_regeneration.SQLRegeneration( + "sql_regeneration": generation.SQLRegeneration( **pipe_components["sql_regeneration"], ) }, @@ -213,7 +196,7 @@ def create_service_container( ), relationship_recommendation=RelationshipRecommendation( pipelines={ - "relationship_recommendation": relationship_recommendation.RelationshipRecommendation( + "relationship_recommendation": generation.RelationshipRecommendation( **pipe_components["relationship_recommendation"], ) }, @@ -221,7 +204,7 @@ def create_service_container( ), question_recommendation=QuestionRecommendation( pipelines={ - "question_recommendation": question_recommendation.QuestionRecommendation( + "question_recommendation": generation.QuestionRecommendation( **pipe_components["question_recommendation"], ), "retrieval": retrieval.Retrieval( @@ -230,7 +213,7 @@ def create_service_container( table_column_retrieval_size=settings.table_column_retrieval_size, allow_using_db_schemas_without_pruning=settings.allow_using_db_schemas_without_pruning, ), - "sql_generation": sql_generation.SQLGeneration( + "sql_generation": generation.SQLGeneration( **pipe_components["sql_generation"], ), }, diff --git a/wren-ai-service/src/pipelines/generation/__init__.py b/wren-ai-service/src/pipelines/generation/__init__.py index e86c31801..4a10ac239 100644 --- a/wren-ai-service/src/pipelines/generation/__init__.py +++ b/wren-ai-service/src/pipelines/generation/__init__.py @@ -1,3 +1,35 @@ +from .chart_adjustment import ChartAdjustment +from .chart_generation import ChartGeneration +from .data_assistance import DataAssistance +from .followup_sql_generation import FollowUpSQLGeneration +from .intent_classification import IntentClassification +from .question_recommendation import QuestionRecommendation +from .relationship_recommendation import RelationshipRecommendation from .semantics_enrichment import SemanticsEnrichment +from .sql_answer import SQLAnswer +from .sql_breakdown import SQLBreakdown +from .sql_correction import SQLCorrection +from .sql_expansion import SQLExpansion +from .sql_explanation import SQLExplanation +from .sql_generation import SQLGeneration +from .sql_regeneration import SQLRegeneration +from .sql_summary import SQLSummary -__all__ = ["SemanticsEnrichment"] +__all__ = [ + "ChartAdjustment", + "ChartGeneration", + "DataAssistance", + "FollowUpSQLGeneration", + "IntentClassification", + "QuestionRecommendation", + "RelationshipRecommendation", + "SemanticsEnrichment", + "SQLAnswer", + "SQLBreakdown", + "SQLCorrection", + "SQLExpansion", + "SQLExplanation", + "SQLGeneration", + "SQLRegeneration", + "SQLSummary", +] From cb2cb1d3714370ec25d82ec27c16c46b5b39d870 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Fri, 6 Dec 2024 15:39:07 +0800 Subject: [PATCH 11/14] chore: expose class from package level for retrieval pipe --- wren-ai-service/src/globals.py | 16 +++++----------- .../src/pipelines/retrieval/__init__.py | 6 ++++++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index 63db21fca..7a368e85b 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -6,13 +6,7 @@ from src.config import Settings from src.core.pipeline import PipelineComponent from src.core.provider import EmbedderProvider, LLMProvider -from src.pipelines import generation, indexing -from src.pipelines.retrieval import ( - historical_question, - preprocess_sql_data, - retrieval, - sql_executor, -) +from src.pipelines import generation, indexing, retrieval from src.web.v1.services.ask import AskService from src.web.v1.services.ask_details import AskDetailsService from src.web.v1.services.chart import ChartService @@ -97,7 +91,7 @@ def create_service_container( table_column_retrieval_size=settings.table_column_retrieval_size, allow_using_db_schemas_without_pruning=settings.allow_using_db_schemas_without_pruning, ), - "historical_question": historical_question.HistoricalQuestion( + "historical_question": retrieval.HistoricalQuestion( **pipe_components["historical_question_retrieval"], ), "sql_generation": generation.SQLGeneration( @@ -117,7 +111,7 @@ def create_service_container( ), chart_service=ChartService( pipelines={ - "sql_executor": sql_executor.SQLExecutor( + "sql_executor": retrieval.SQLExecutor( **pipe_components["sql_executor"], ), "chart_generation": generation.ChartGeneration( @@ -128,7 +122,7 @@ def create_service_container( ), chart_adjustment_service=ChartAdjustmentService( pipelines={ - "sql_executor": sql_executor.SQLExecutor( + "sql_executor": retrieval.SQLExecutor( **pipe_components["sql_executor"], ), "chart_adjustment": generation.ChartAdjustment( @@ -139,7 +133,7 @@ def create_service_container( ), sql_answer_service=SqlAnswerService( pipelines={ - "preprocess_sql_data": preprocess_sql_data.PreprocessSqlData( + "preprocess_sql_data": retrieval.PreprocessSqlData( **pipe_components["preprocess_sql_data"], ), "sql_answer": generation.SQLAnswer( diff --git a/wren-ai-service/src/pipelines/retrieval/__init__.py b/wren-ai-service/src/pipelines/retrieval/__init__.py index e69de29bb..65a653730 100644 --- a/wren-ai-service/src/pipelines/retrieval/__init__.py +++ b/wren-ai-service/src/pipelines/retrieval/__init__.py @@ -0,0 +1,6 @@ +from .historical_question import HistoricalQuestion +from .preprocess_sql_data import PreprocessSqlData +from .retrieval import Retrieval +from .sql_executor import SQLExecutor + +__all__ = ["HistoricalQuestion", "PreprocessSqlData", "Retrieval", "SQLExecutor"] From 2b32902ef74e66f2d6e9b80cdbacc24e849cd6d9 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Thu, 12 Dec 2024 15:46:56 +0800 Subject: [PATCH 12/14] chore: change to litellm llm provider for deployment file --- deployment/kustomizations/base/cm.yaml | 2 +- docker/config.example.yaml | 2 +- wren-ai-service/tools/config/config.example.yaml | 2 +- wren-ai-service/tools/config/config.full.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index 87f44ccc8..55365e293 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -142,7 +142,7 @@ data: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/docker/config.example.yaml b/docker/config.example.yaml index b82b7625d..ade526a9e 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -92,7 +92,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index 102693f6f..4bb953a5d 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -106,7 +106,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index 7869b46f1..0032b5871 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -125,7 +125,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: openai_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui From b37653c7fb0f2f1d4bf6bb9fa2cd880b0a93fd65 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Tue, 17 Dec 2024 18:39:11 +0800 Subject: [PATCH 13/14] chore: remove unnecessary comment --- wren-ai-service/src/globals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-ai-service/src/globals.py b/wren-ai-service/src/globals.py index 7a368e85b..335f50d8d 100644 --- a/wren-ai-service/src/globals.py +++ b/wren-ai-service/src/globals.py @@ -1,4 +1,4 @@ -import logging # noqa: I001 +import logging from dataclasses import asdict, dataclass import toml From 4cd4effcb3c5e4f036fbe59c2c4017580a0faa27 Mon Sep 17 00:00:00 2001 From: Pao-Sheng Wang Date: Mon, 23 Dec 2024 16:32:32 +0800 Subject: [PATCH 14/14] chore: specify the dated model snapshot id --- deployment/kustomizations/base/cm.yaml | 2 +- docker/config.example.yaml | 2 +- wren-ai-service/tools/config/config.example.yaml | 2 +- wren-ai-service/tools/config/config.full.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deployment/kustomizations/base/cm.yaml b/deployment/kustomizations/base/cm.yaml index 55365e293..7e59f8f39 100644 --- a/deployment/kustomizations/base/cm.yaml +++ b/deployment/kustomizations/base/cm.yaml @@ -142,7 +142,7 @@ data: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/docker/config.example.yaml b/docker/config.example.yaml index ade526a9e..fa03c339e 100644 --- a/docker/config.example.yaml +++ b/docker/config.example.yaml @@ -92,7 +92,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.example.yaml b/wren-ai-service/tools/config/config.example.yaml index 4bb953a5d..4eb94500d 100644 --- a/wren-ai-service/tools/config/config.example.yaml +++ b/wren-ai-service/tools/config/config.example.yaml @@ -106,7 +106,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui diff --git a/wren-ai-service/tools/config/config.full.yaml b/wren-ai-service/tools/config/config.full.yaml index 0032b5871..28e823de6 100644 --- a/wren-ai-service/tools/config/config.full.yaml +++ b/wren-ai-service/tools/config/config.full.yaml @@ -125,7 +125,7 @@ pipes: llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui - name: semantics_enrichment - llm: litellm_llm.gpt-4o-mini + llm: litellm_llm.gpt-4o-mini-2024-07-18 - name: relationship_recommendation llm: litellm_llm.gpt-4o-mini-2024-07-18 engine: wren_ui