Version 1.4.24

abacusai · Dec 20, 2024 · 32465b2 · 32465b2
1 parent 1de29c3
commit 32465b2
Show file tree

Hide file tree

Showing 271 changed files with 1,322 additions and 97 deletions.
diff --git a/abacusai/__init__.py b/abacusai/__init__.py
@@ -225,7 +225,8 @@
 from .web_search_response import WebSearchResponse
 from .web_search_result import WebSearchResult
 from .webhook import Webhook
+from .workflow_graph_node import WorkflowGraphNode
 from .workflow_node_template import WorkflowNodeTemplate
 
 
-__version__ = "1.4.23"
+__version__ = "1.4.24"
diff --git a/abacusai/api_class/ai_agents.py b/abacusai/api_class/ai_agents.py
@@ -1,6 +1,6 @@
 import ast
 import dataclasses
-from typing import Dict, List, Union
+from typing import Dict, List, Tuple, Union
 
 from . import enums
 from .abstract import ApiClass, get_clean_function_source_code_for_agent, validate_constructor_arg_types
@@ -34,6 +34,8 @@ class FieldDescriptor(ApiClass):
 class JSONSchema:
     @classmethod
     def from_fields_list(cls, fields_list: List[str]):
+        if not fields_list:
+            return cls(json_schema={})
         json_schema = {
             'type': 'object',
             'properties': {field: {'title': field, 'type': 'string'} for field in fields_list}
@@ -62,17 +64,29 @@ class WorkflowNodeInputMapping(ApiClass):
                                Set to `None` if the type is `USER_INPUT` and the variable doesn't need a pre-filled initial value.
         is_required (bool): Indicates whether the input is required. Defaults to True.
         description (str): The description of this input.
+        constant_value (str): The constant value of this input if variable type is CONSTANT. Only applicable for template nodes.
     """
     name: str
     variable_type: enums.WorkflowNodeInputType
     variable_source: str = dataclasses.field(default=None)
     source_prop: str = dataclasses.field(default=None)
     is_required: bool = dataclasses.field(default=True)
     description: str = dataclasses.field(default=None)
+    constant_value: str = dataclasses.field(default=None)
 
     def __post_init__(self):
-        if self.variable_type == enums.WorkflowNodeInputType.IGNORE and self.is_required:
-            raise ValueError('input_mapping', 'Invalid input mapping. The variable type cannot be IGNORE if is_required is True.')
+        if self.variable_type == enums.WorkflowNodeInputType.IGNORE:
+            if self.is_required:
+                raise ValueError('input_mapping', 'Invalid input mapping. The variable type cannot be IGNORE if is_required is True.')
+            if self.variable_source or self.source_prop:
+                raise ValueError('input_mapping', 'variable source and source prop should not be provided for IGNORE input mappings.')
+        if self.variable_type != enums.WorkflowNodeInputType.CONSTANT and self.constant_value:
+            raise ValueError('input_mapping', 'Invalid input mapping. If the variable type is not CONSTANT, constant_value must be empty.')
+        if self.variable_type == enums.WorkflowNodeInputType.CONSTANT:
+            if self.is_required and self.constant_value is None:
+                raise ValueError('input_mapping', 'The constant value mapping should be provided for required CONSTANT input mappings.')
+            if self.variable_source or self.source_prop:
+                raise ValueError('input_mapping', 'variable source and source prop should not be provided for CONSTANT input mappings.')
         if isinstance(self.variable_type, str):
             self.variable_type = enums.WorkflowNodeInputType(self.variable_type)
 
@@ -83,7 +97,8 @@ def to_dict(self):
             'variable_source': self.variable_source,
             'source_prop': self.source_prop or self.name,
             'is_required': self.is_required,
-            'description': self.description
+            'description': self.description,
+            'constant_value': self.constant_value
         }
 
     @classmethod
@@ -97,7 +112,8 @@ def from_dict(cls, mapping: dict):
             variable_source=mapping.get('variable_source'),
             source_prop=mapping.get('source_prop') or mapping['name'] if mapping.get('variable_source') else None,
             is_required=mapping.get('is_required', True),
-            description=mapping.get('description')
+            description=mapping.get('description'),
+            constant_value=mapping.get('constant_value')
         )
 
 
@@ -166,6 +182,25 @@ def from_workflow_node(cls, schema_source: str, schema_prop: str):
         instance.runtime_schema = True
         return instance
 
+    @classmethod
+    def from_input_mappings(cls, input_mappings: List[WorkflowNodeInputMapping]):
+        """
+        Creates a json_schema for the input schema of the node from it's input mappings.
+
+        Args:
+            input_mappings (List[WorkflowNodeInputMapping]): The input mappings for the node.
+        """
+        user_input_mappings = [input_mapping for input_mapping in input_mappings if input_mapping.variable_type == enums.WorkflowNodeInputType.USER_INPUT]
+        if len(user_input_mappings) > 0:
+            json_schema = {
+                'type': 'object',
+                'required': [input_mapping.name for input_mapping in user_input_mappings if input_mapping.is_required],
+                'properties': {input_mapping.name: {'title': input_mapping.name, 'type': 'string'} for input_mapping in user_input_mappings}
+            }
+            return cls(json_schema=json_schema)
+        else:
+            return cls(json_schema={})
+
 
 @validate_constructor_arg_types('output_mapping')
 @dataclasses.dataclass
@@ -274,7 +309,7 @@ class WorkflowGraphNode(ApiClass):
         trigger_config (TriggerConfig): The configuration for a trigger workflow node.
     """
 
-    def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputMapping], List[WorkflowNodeInputMapping]] = None, output_mappings: Union[List[str], Dict[str, str], List[WorkflowNodeOutputMapping]] = None, function: callable = None, function_name: str = None, source_code: str = None, input_schema: Union[List[str], WorkflowNodeInputSchema] = None, output_schema: Union[List[str], WorkflowNodeOutputSchema] = None, template_metadata: dict = None, trigger_config: TriggerConfig = None):
+    def __init__(self, name: str, function: callable = None, input_mappings: Union[Dict[str, WorkflowNodeInputMapping], List[WorkflowNodeInputMapping]] = None, output_mappings: Union[List[str], Dict[str, str], List[WorkflowNodeOutputMapping]] = None, function_name: str = None, source_code: str = None, input_schema: Union[List[str], WorkflowNodeInputSchema] = None, output_schema: Union[List[str], WorkflowNodeOutputSchema] = None, template_metadata: dict = None, trigger_config: TriggerConfig = None):
         self.template_metadata = template_metadata
         self.trigger_config = trigger_config
         if self.template_metadata and not self.template_metadata.get('initialized'):
@@ -314,7 +349,7 @@ def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputM
 
             is_shortform_input_mappings = False
             if input_mappings is None:
-                input_mappings = []
+                input_mappings = {}
             if isinstance(input_mappings, List) and all(isinstance(input, WorkflowNodeInputMapping) for input in input_mappings):
                 self.input_mappings = input_mappings
                 input_mapping_args = [input.name for input in input_mappings]
@@ -334,6 +369,22 @@ def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputM
             else:
                 raise ValueError('workflow_graph_node', 'Invalid input mappings. Must be a list of WorkflowNodeInputMapping or a dictionary of input mappings in the form {arg_name: node_name.outputs.prop_name}.')
 
+            if input_schema is None:
+                self.input_schema = WorkflowNodeInputSchema.from_input_mappings(self.input_mappings)
+            elif isinstance(input_schema, WorkflowNodeInputSchema):
+                self.input_schema = input_schema
+            elif isinstance(input_schema, list) and all(isinstance(field, str) for field in input_schema):
+                self.input_schema = WorkflowNodeInputSchema.from_fields_list(input_schema)
+            else:
+                raise ValueError('workflow_graph_node', 'Invalid input schema. Must be a WorkflowNodeInputSchema or a list of field names.')
+
+            if input_schema is not None and is_shortform_input_mappings:
+                # If user provided input_schema and input_mappings in shortform, then we need to update the input_mappings to have the correct variable_type
+                user_input_fields = JSONSchema.to_fields_list(self.input_schema)
+                for mapping in self.input_mappings:
+                    if mapping.name in user_input_fields:
+                        mapping.variable_type = enums.WorkflowNodeInputType.USER_INPUT
+
             if output_mappings is None:
                 output_mappings = []
             if isinstance(output_mappings, List):
@@ -348,23 +399,9 @@ def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputM
             else:
                 raise ValueError('workflow_graph_node', 'Invalid output mappings. Must be a list of WorkflowNodeOutputMapping or a list of output names or a dictionary of output mappings in the form {output_name: output_type}.')
 
-            if not input_schema:
-                self.input_schema = WorkflowNodeInputSchema(json_schema={}, ui_schema={})
-            elif isinstance(input_schema, WorkflowNodeInputSchema):
-                self.input_schema = input_schema
-            elif isinstance(input_schema, list) and all(isinstance(field, str) for field in input_schema):
-                self.input_schema = WorkflowNodeInputSchema.from_fields_list(input_schema)
-            else:
-                raise ValueError('workflow_graph_node', 'Invalid input schema. Must be a WorkflowNodeInputSchema or a list of field names.')
-
-            if is_shortform_input_mappings:
-                user_input_fields = JSONSchema.to_fields_list(self.input_schema)
-                for mapping in self.input_mappings:
-                    if mapping.name in user_input_fields:
-                        mapping.variable_type = enums.WorkflowNodeInputType.USER_INPUT
-
-            if not output_schema:
-                self.output_schema = WorkflowNodeOutputSchema({})
+            if output_schema is None:
+                outputs = [output.name for output in self.output_mappings]
+                self.output_schema = WorkflowNodeOutputSchema.from_fields_list(outputs)
             elif isinstance(output_schema, WorkflowNodeOutputSchema):
                 self.output_schema = output_schema
             elif isinstance(output_schema, list) and all(isinstance(field, str) for field in output_schema):
@@ -546,10 +583,19 @@ class WorkflowGraph(ApiClass):
         primary_start_node (Union[str, WorkflowGraphNode]): The primary node to start the workflow from.
     """
     nodes: List[WorkflowGraphNode] = dataclasses.field(default_factory=list)
-    edges: List[WorkflowGraphEdge] = dataclasses.field(default_factory=list)
+    edges: List[Union[WorkflowGraphEdge, Tuple[WorkflowGraphNode, WorkflowGraphNode, dict], Tuple[str, str, dict]]] = dataclasses.field(default_factory=list)
     primary_start_node: Union[str, WorkflowGraphNode] = dataclasses.field(default=None)
     common_source_code: str = dataclasses.field(default=None)
 
+    def __post_init__(self):
+        if self.edges:
+            for index, edge in enumerate(self.edges):
+                if isinstance(edge, Tuple):
+                    source = edge[0] if isinstance(edge[0], str) else edge[0].name
+                    target = edge[1] if isinstance(edge[1], str) else edge[1].name
+                    details = edge[2] if isinstance(edge[2], dict) else None
+                    self.edges[index] = WorkflowGraphEdge(source=source, target=target, details=details)
+
     def to_dict(self):
         return {
             'nodes': [node.to_dict() for node in self.nodes],
@@ -566,7 +612,7 @@ def from_dict(cls, graph: dict):
                 node['__return_filter'] = True
         nodes = [WorkflowGraphNode.from_dict(node) for node in graph.get('nodes', [])]
         edges = [WorkflowGraphEdge.from_dict(edge) for edge in graph.get('edges', [])]
-        if graph.get('primary_start_node') is None:
+        if graph.get('primary_start_node') not in [node.name for node in nodes]:
             non_primary_nodes = set()
             for edge in edges:
                 non_primary_nodes.add(edge.target)

diff --git a/abacusai/api_class/enums.py b/abacusai/api_class/enums.py
@@ -485,6 +485,8 @@ class LLMName(ApiEnum):
     ABACUS_SMAUG3 = 'ABACUS_SMAUG3'
     ABACUS_DRACARYS = 'ABACUS_DRACARYS'
     QWEN_2_5_32B = 'QWEN_2_5_32B'
+    QWEN_2_5_32B_BASE = 'QWEN_2_5_32B_BASE'
+    QWEN_2_5_72B = 'QWEN_2_5_72B'
     QWQ_32B = 'QWQ_32B'
     GEMINI_1_5_FLASH = 'GEMINI_1_5_FLASH'
     XAI_GROK = 'XAI_GROK'
@@ -554,6 +556,7 @@ class WorkflowNodeInputType(ApiEnum):
     USER_INPUT = 'USER_INPUT'
     WORKFLOW_VARIABLE = 'WORKFLOW_VARIABLE'
     IGNORE = 'IGNORE'
+    CONSTANT = 'CONSTANT'
 
 
 class WorkflowNodeOutputType(ApiEnum):

diff --git a/abacusai/api_client_utils.py b/abacusai/api_client_utils.py
@@ -617,7 +617,7 @@ def combine_doc_info(group):
                 cls.TOKENS: [token for page in page_infos for token in page.get(cls.TOKENS) or []],
                 # default to embedded text
                 cls.PAGES: [page.get(cls.PAGE_TEXT) or '' for page in page_infos],
-                **({cls.DOC_ID: page_infos[0]} if cls.DOC_ID in page_infos[0] else {}),
+                **({cls.DOC_ID: page_infos[0][cls.DOC_ID]} if cls.DOC_ID in page_infos[0] else {}),
             }
             document_data[cls.EMBEDDED_TEXT] = combine_page_texts(info.get(
                 cls.EMBEDDED_TEXT) or info.get(cls.PAGE_TEXT) or '' for info in page_infos)

diff --git a/abacusai/client.py b/abacusai/client.py
@@ -181,6 +181,7 @@
 from .web_page_response import WebPageResponse
 from .web_search_response import WebSearchResponse
 from .webhook import Webhook
+from .workflow_graph_node import WorkflowGraphNode
 from .workflow_node_template import WorkflowNodeTemplate
 
 
@@ -635,7 +636,7 @@ class BaseApiClient:
         client_options (ClientOptions): Optional API client configurations
         skip_version_check (bool): If true, will skip checking the server's current API version on initializing the client
     """
-    client_version = '1.4.23'
+    client_version = '1.4.24'
 
     def __init__(self, api_key: str = None, server: str = None, client_options: ClientOptions = None, skip_version_check: bool = False, include_tb: bool = False):
         self.api_key = api_key
@@ -3743,7 +3744,8 @@ def _endpoint(deployment_id: str, ttl_hash: int):
 
                 with self._request(endpoint, 'GET', query_params={'sql': sql}, stream=True, retry_500=True) as response:
                     if response.status_code == 200:
-                        return pd.read_csv(response.raw, sep=',')
+                        buf = io.BytesIO(response.content)
+                        return pd.read_parquet(buf, engine='pyarrow')
                     else:
                         error_json = response.json()
                         error_message = error_json.get('error')
@@ -6004,7 +6006,7 @@ def get_docstore_document_data(self, doc_id: str, document_processing_config: Un
         return self._proxy_request('getDocstoreDocumentData', 'POST', query_params={}, body={'docId': doc_id, 'documentProcessingConfig': document_processing_config, 'documentProcessingVersion': document_processing_version, 'returnExtractedPageText': return_extracted_page_text}, parse_type=DocumentData, is_sync=True)
 
     def extract_document_data(self, document: io.TextIOBase = None, doc_id: str = None, document_processing_config: Union[dict, DocumentProcessingConfig] = None, start_page: int = None, end_page: int = None, return_extracted_page_text: bool = False) -> DocumentData:
-        """Extracts data from a document.
+        """Extracts data from a document using either OCR (for scanned documents/images) or embedded text extraction (for digital documents like .docx). Configure the extraction method through DocumentProcessingConfig
 
         Args:
             document (io.TextIOBase): The document to extract data from. One of document or doc_id must be provided.
@@ -8640,7 +8642,7 @@ def generate_agent_code(self, project_id: str, prompt: str, fast_mode: bool = No
             fast_mode (bool): If True, runs a faster but slightly less accurate code generation pipeline"""
         return self._call_api('generateAgentCode', 'POST', query_params={}, body={'projectId': project_id, 'prompt': prompt, 'fastMode': fast_mode})
 
-    def evaluate_prompt(self, prompt: str = None, system_message: str = None, llm_name: Union[LLMName, str] = None, max_tokens: int = None, temperature: float = 0.0, messages: list = None, response_type: str = None, json_response_schema: dict = None, stop_sequences: list = None, top_p: float = None) -> LlmResponse:
+    def evaluate_prompt(self, prompt: str = None, system_message: str = None, llm_name: Union[LLMName, str] = None, max_tokens: int = None, temperature: float = 0.0, messages: list = None, response_type: str = None, json_response_schema: dict = None, stop_sequences: List = None, top_p: float = None) -> LlmResponse:
         """Generate response to the prompt using the specified model.
 
         Args:
@@ -8652,7 +8654,7 @@ def evaluate_prompt(self, prompt: str = None, system_message: str = None, llm_na
             messages (list): A list of messages to use as conversation history. For completion models like OPENAI_GPT3_5_TEXT and PALM_TEXT this should not be set. A message is a dict with attributes: is_user (bool): Whether the message is from the user. text (str): The message's text. attachments (list): The files attached to the message represented as a list of dictionaries [{"doc_id": <doc_id1>}, {"doc_id": <doc_id2>}]
             response_type (str): Specifies the type of response to request from the LLM. One of 'text' and 'json'. If set to 'json', the LLM will respond with a json formatted string whose schema can be specified `json_response_schema`. Defaults to 'text'
             json_response_schema (dict): A dictionary specifying the keys/schema/parameters which LLM should adhere to in its response when `response_type` is 'json'. Each parameter is mapped to a dict with the following info - type (str) (required): Data type of the parameter. description (str) (required): Description of the parameter. is_required (bool) (optional): Whether the parameter is required or not. Example: json_response_schema = {'title': {'type': 'string', 'description': 'Article title', 'is_required': true}, 'body': {'type': 'string', 'description': 'Article body'}}
-            stop_sequences (list): Specifies the strings on which the LLM will stop generation.
+            stop_sequences (List): Specifies the strings on which the LLM will stop generation.
             top_p (float): The nucleus sampling value used for this run. If set, the model will sample from the smallest set of tokens whose cumulative probability exceeds the probability `top_p`. Default is 1.0. A range of 0.0 - 1.0 is allowed. It is generally recommended to use either temperature sampling or nucleus sampling, but not both.
 
         Returns: