Skip to content

Commit

Permalink
Version 1.4.24
Browse files Browse the repository at this point in the history
  • Loading branch information
surajabacusai committed Dec 20, 2024
1 parent 1de29c3 commit 32465b2
Show file tree
Hide file tree
Showing 271 changed files with 1,322 additions and 97 deletions.
3 changes: 2 additions & 1 deletion abacusai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@
from .web_search_response import WebSearchResponse
from .web_search_result import WebSearchResult
from .webhook import Webhook
from .workflow_graph_node import WorkflowGraphNode
from .workflow_node_template import WorkflowNodeTemplate


__version__ = "1.4.23"
__version__ = "1.4.24"
98 changes: 72 additions & 26 deletions abacusai/api_class/ai_agents.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import ast
import dataclasses
from typing import Dict, List, Union
from typing import Dict, List, Tuple, Union

from . import enums
from .abstract import ApiClass, get_clean_function_source_code_for_agent, validate_constructor_arg_types
Expand Down Expand Up @@ -34,6 +34,8 @@ class FieldDescriptor(ApiClass):
class JSONSchema:
@classmethod
def from_fields_list(cls, fields_list: List[str]):
if not fields_list:
return cls(json_schema={})
json_schema = {
'type': 'object',
'properties': {field: {'title': field, 'type': 'string'} for field in fields_list}
Expand Down Expand Up @@ -62,17 +64,29 @@ class WorkflowNodeInputMapping(ApiClass):
Set to `None` if the type is `USER_INPUT` and the variable doesn't need a pre-filled initial value.
is_required (bool): Indicates whether the input is required. Defaults to True.
description (str): The description of this input.
constant_value (str): The constant value of this input if variable type is CONSTANT. Only applicable for template nodes.
"""
name: str
variable_type: enums.WorkflowNodeInputType
variable_source: str = dataclasses.field(default=None)
source_prop: str = dataclasses.field(default=None)
is_required: bool = dataclasses.field(default=True)
description: str = dataclasses.field(default=None)
constant_value: str = dataclasses.field(default=None)

def __post_init__(self):
if self.variable_type == enums.WorkflowNodeInputType.IGNORE and self.is_required:
raise ValueError('input_mapping', 'Invalid input mapping. The variable type cannot be IGNORE if is_required is True.')
if self.variable_type == enums.WorkflowNodeInputType.IGNORE:
if self.is_required:
raise ValueError('input_mapping', 'Invalid input mapping. The variable type cannot be IGNORE if is_required is True.')
if self.variable_source or self.source_prop:
raise ValueError('input_mapping', 'variable source and source prop should not be provided for IGNORE input mappings.')
if self.variable_type != enums.WorkflowNodeInputType.CONSTANT and self.constant_value:
raise ValueError('input_mapping', 'Invalid input mapping. If the variable type is not CONSTANT, constant_value must be empty.')
if self.variable_type == enums.WorkflowNodeInputType.CONSTANT:
if self.is_required and self.constant_value is None:
raise ValueError('input_mapping', 'The constant value mapping should be provided for required CONSTANT input mappings.')
if self.variable_source or self.source_prop:
raise ValueError('input_mapping', 'variable source and source prop should not be provided for CONSTANT input mappings.')
if isinstance(self.variable_type, str):
self.variable_type = enums.WorkflowNodeInputType(self.variable_type)

Expand All @@ -83,7 +97,8 @@ def to_dict(self):
'variable_source': self.variable_source,
'source_prop': self.source_prop or self.name,
'is_required': self.is_required,
'description': self.description
'description': self.description,
'constant_value': self.constant_value
}

@classmethod
Expand All @@ -97,7 +112,8 @@ def from_dict(cls, mapping: dict):
variable_source=mapping.get('variable_source'),
source_prop=mapping.get('source_prop') or mapping['name'] if mapping.get('variable_source') else None,
is_required=mapping.get('is_required', True),
description=mapping.get('description')
description=mapping.get('description'),
constant_value=mapping.get('constant_value')
)


Expand Down Expand Up @@ -166,6 +182,25 @@ def from_workflow_node(cls, schema_source: str, schema_prop: str):
instance.runtime_schema = True
return instance

@classmethod
def from_input_mappings(cls, input_mappings: List[WorkflowNodeInputMapping]):
"""
Creates a json_schema for the input schema of the node from it's input mappings.
Args:
input_mappings (List[WorkflowNodeInputMapping]): The input mappings for the node.
"""
user_input_mappings = [input_mapping for input_mapping in input_mappings if input_mapping.variable_type == enums.WorkflowNodeInputType.USER_INPUT]
if len(user_input_mappings) > 0:
json_schema = {
'type': 'object',
'required': [input_mapping.name for input_mapping in user_input_mappings if input_mapping.is_required],
'properties': {input_mapping.name: {'title': input_mapping.name, 'type': 'string'} for input_mapping in user_input_mappings}
}
return cls(json_schema=json_schema)
else:
return cls(json_schema={})


@validate_constructor_arg_types('output_mapping')
@dataclasses.dataclass
Expand Down Expand Up @@ -274,7 +309,7 @@ class WorkflowGraphNode(ApiClass):
trigger_config (TriggerConfig): The configuration for a trigger workflow node.
"""

def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputMapping], List[WorkflowNodeInputMapping]] = None, output_mappings: Union[List[str], Dict[str, str], List[WorkflowNodeOutputMapping]] = None, function: callable = None, function_name: str = None, source_code: str = None, input_schema: Union[List[str], WorkflowNodeInputSchema] = None, output_schema: Union[List[str], WorkflowNodeOutputSchema] = None, template_metadata: dict = None, trigger_config: TriggerConfig = None):
def __init__(self, name: str, function: callable = None, input_mappings: Union[Dict[str, WorkflowNodeInputMapping], List[WorkflowNodeInputMapping]] = None, output_mappings: Union[List[str], Dict[str, str], List[WorkflowNodeOutputMapping]] = None, function_name: str = None, source_code: str = None, input_schema: Union[List[str], WorkflowNodeInputSchema] = None, output_schema: Union[List[str], WorkflowNodeOutputSchema] = None, template_metadata: dict = None, trigger_config: TriggerConfig = None):
self.template_metadata = template_metadata
self.trigger_config = trigger_config
if self.template_metadata and not self.template_metadata.get('initialized'):
Expand Down Expand Up @@ -314,7 +349,7 @@ def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputM

is_shortform_input_mappings = False
if input_mappings is None:
input_mappings = []
input_mappings = {}
if isinstance(input_mappings, List) and all(isinstance(input, WorkflowNodeInputMapping) for input in input_mappings):
self.input_mappings = input_mappings
input_mapping_args = [input.name for input in input_mappings]
Expand All @@ -334,6 +369,22 @@ def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputM
else:
raise ValueError('workflow_graph_node', 'Invalid input mappings. Must be a list of WorkflowNodeInputMapping or a dictionary of input mappings in the form {arg_name: node_name.outputs.prop_name}.')

if input_schema is None:
self.input_schema = WorkflowNodeInputSchema.from_input_mappings(self.input_mappings)
elif isinstance(input_schema, WorkflowNodeInputSchema):
self.input_schema = input_schema
elif isinstance(input_schema, list) and all(isinstance(field, str) for field in input_schema):
self.input_schema = WorkflowNodeInputSchema.from_fields_list(input_schema)
else:
raise ValueError('workflow_graph_node', 'Invalid input schema. Must be a WorkflowNodeInputSchema or a list of field names.')

if input_schema is not None and is_shortform_input_mappings:
# If user provided input_schema and input_mappings in shortform, then we need to update the input_mappings to have the correct variable_type
user_input_fields = JSONSchema.to_fields_list(self.input_schema)
for mapping in self.input_mappings:
if mapping.name in user_input_fields:
mapping.variable_type = enums.WorkflowNodeInputType.USER_INPUT

if output_mappings is None:
output_mappings = []
if isinstance(output_mappings, List):
Expand All @@ -348,23 +399,9 @@ def __init__(self, name: str, input_mappings: Union[Dict[str, WorkflowNodeInputM
else:
raise ValueError('workflow_graph_node', 'Invalid output mappings. Must be a list of WorkflowNodeOutputMapping or a list of output names or a dictionary of output mappings in the form {output_name: output_type}.')

if not input_schema:
self.input_schema = WorkflowNodeInputSchema(json_schema={}, ui_schema={})
elif isinstance(input_schema, WorkflowNodeInputSchema):
self.input_schema = input_schema
elif isinstance(input_schema, list) and all(isinstance(field, str) for field in input_schema):
self.input_schema = WorkflowNodeInputSchema.from_fields_list(input_schema)
else:
raise ValueError('workflow_graph_node', 'Invalid input schema. Must be a WorkflowNodeInputSchema or a list of field names.')

if is_shortform_input_mappings:
user_input_fields = JSONSchema.to_fields_list(self.input_schema)
for mapping in self.input_mappings:
if mapping.name in user_input_fields:
mapping.variable_type = enums.WorkflowNodeInputType.USER_INPUT

if not output_schema:
self.output_schema = WorkflowNodeOutputSchema({})
if output_schema is None:
outputs = [output.name for output in self.output_mappings]
self.output_schema = WorkflowNodeOutputSchema.from_fields_list(outputs)
elif isinstance(output_schema, WorkflowNodeOutputSchema):
self.output_schema = output_schema
elif isinstance(output_schema, list) and all(isinstance(field, str) for field in output_schema):
Expand Down Expand Up @@ -546,10 +583,19 @@ class WorkflowGraph(ApiClass):
primary_start_node (Union[str, WorkflowGraphNode]): The primary node to start the workflow from.
"""
nodes: List[WorkflowGraphNode] = dataclasses.field(default_factory=list)
edges: List[WorkflowGraphEdge] = dataclasses.field(default_factory=list)
edges: List[Union[WorkflowGraphEdge, Tuple[WorkflowGraphNode, WorkflowGraphNode, dict], Tuple[str, str, dict]]] = dataclasses.field(default_factory=list)
primary_start_node: Union[str, WorkflowGraphNode] = dataclasses.field(default=None)
common_source_code: str = dataclasses.field(default=None)

def __post_init__(self):
if self.edges:
for index, edge in enumerate(self.edges):
if isinstance(edge, Tuple):
source = edge[0] if isinstance(edge[0], str) else edge[0].name
target = edge[1] if isinstance(edge[1], str) else edge[1].name
details = edge[2] if isinstance(edge[2], dict) else None
self.edges[index] = WorkflowGraphEdge(source=source, target=target, details=details)

def to_dict(self):
return {
'nodes': [node.to_dict() for node in self.nodes],
Expand All @@ -566,7 +612,7 @@ def from_dict(cls, graph: dict):
node['__return_filter'] = True
nodes = [WorkflowGraphNode.from_dict(node) for node in graph.get('nodes', [])]
edges = [WorkflowGraphEdge.from_dict(edge) for edge in graph.get('edges', [])]
if graph.get('primary_start_node') is None:
if graph.get('primary_start_node') not in [node.name for node in nodes]:
non_primary_nodes = set()
for edge in edges:
non_primary_nodes.add(edge.target)
Expand Down
3 changes: 3 additions & 0 deletions abacusai/api_class/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,8 @@ class LLMName(ApiEnum):
ABACUS_SMAUG3 = 'ABACUS_SMAUG3'
ABACUS_DRACARYS = 'ABACUS_DRACARYS'
QWEN_2_5_32B = 'QWEN_2_5_32B'
QWEN_2_5_32B_BASE = 'QWEN_2_5_32B_BASE'
QWEN_2_5_72B = 'QWEN_2_5_72B'
QWQ_32B = 'QWQ_32B'
GEMINI_1_5_FLASH = 'GEMINI_1_5_FLASH'
XAI_GROK = 'XAI_GROK'
Expand Down Expand Up @@ -554,6 +556,7 @@ class WorkflowNodeInputType(ApiEnum):
USER_INPUT = 'USER_INPUT'
WORKFLOW_VARIABLE = 'WORKFLOW_VARIABLE'
IGNORE = 'IGNORE'
CONSTANT = 'CONSTANT'


class WorkflowNodeOutputType(ApiEnum):
Expand Down
2 changes: 1 addition & 1 deletion abacusai/api_client_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ def combine_doc_info(group):
cls.TOKENS: [token for page in page_infos for token in page.get(cls.TOKENS) or []],
# default to embedded text
cls.PAGES: [page.get(cls.PAGE_TEXT) or '' for page in page_infos],
**({cls.DOC_ID: page_infos[0]} if cls.DOC_ID in page_infos[0] else {}),
**({cls.DOC_ID: page_infos[0][cls.DOC_ID]} if cls.DOC_ID in page_infos[0] else {}),
}
document_data[cls.EMBEDDED_TEXT] = combine_page_texts(info.get(
cls.EMBEDDED_TEXT) or info.get(cls.PAGE_TEXT) or '' for info in page_infos)
Expand Down
12 changes: 7 additions & 5 deletions abacusai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@
from .web_page_response import WebPageResponse
from .web_search_response import WebSearchResponse
from .webhook import Webhook
from .workflow_graph_node import WorkflowGraphNode
from .workflow_node_template import WorkflowNodeTemplate


Expand Down Expand Up @@ -635,7 +636,7 @@ class BaseApiClient:
client_options (ClientOptions): Optional API client configurations
skip_version_check (bool): If true, will skip checking the server's current API version on initializing the client
"""
client_version = '1.4.23'
client_version = '1.4.24'

def __init__(self, api_key: str = None, server: str = None, client_options: ClientOptions = None, skip_version_check: bool = False, include_tb: bool = False):
self.api_key = api_key
Expand Down Expand Up @@ -3743,7 +3744,8 @@ def _endpoint(deployment_id: str, ttl_hash: int):

with self._request(endpoint, 'GET', query_params={'sql': sql}, stream=True, retry_500=True) as response:
if response.status_code == 200:
return pd.read_csv(response.raw, sep=',')
buf = io.BytesIO(response.content)
return pd.read_parquet(buf, engine='pyarrow')
else:
error_json = response.json()
error_message = error_json.get('error')
Expand Down Expand Up @@ -6004,7 +6006,7 @@ def get_docstore_document_data(self, doc_id: str, document_processing_config: Un
return self._proxy_request('getDocstoreDocumentData', 'POST', query_params={}, body={'docId': doc_id, 'documentProcessingConfig': document_processing_config, 'documentProcessingVersion': document_processing_version, 'returnExtractedPageText': return_extracted_page_text}, parse_type=DocumentData, is_sync=True)

def extract_document_data(self, document: io.TextIOBase = None, doc_id: str = None, document_processing_config: Union[dict, DocumentProcessingConfig] = None, start_page: int = None, end_page: int = None, return_extracted_page_text: bool = False) -> DocumentData:
"""Extracts data from a document.
"""Extracts data from a document using either OCR (for scanned documents/images) or embedded text extraction (for digital documents like .docx). Configure the extraction method through DocumentProcessingConfig

Args:
document (io.TextIOBase): The document to extract data from. One of document or doc_id must be provided.
Expand Down Expand Up @@ -8640,7 +8642,7 @@ def generate_agent_code(self, project_id: str, prompt: str, fast_mode: bool = No
fast_mode (bool): If True, runs a faster but slightly less accurate code generation pipeline"""
return self._call_api('generateAgentCode', 'POST', query_params={}, body={'projectId': project_id, 'prompt': prompt, 'fastMode': fast_mode})

def evaluate_prompt(self, prompt: str = None, system_message: str = None, llm_name: Union[LLMName, str] = None, max_tokens: int = None, temperature: float = 0.0, messages: list = None, response_type: str = None, json_response_schema: dict = None, stop_sequences: list = None, top_p: float = None) -> LlmResponse:
def evaluate_prompt(self, prompt: str = None, system_message: str = None, llm_name: Union[LLMName, str] = None, max_tokens: int = None, temperature: float = 0.0, messages: list = None, response_type: str = None, json_response_schema: dict = None, stop_sequences: List = None, top_p: float = None) -> LlmResponse:
"""Generate response to the prompt using the specified model.

Args:
Expand All @@ -8652,7 +8654,7 @@ def evaluate_prompt(self, prompt: str = None, system_message: str = None, llm_na
messages (list): A list of messages to use as conversation history. For completion models like OPENAI_GPT3_5_TEXT and PALM_TEXT this should not be set. A message is a dict with attributes: is_user (bool): Whether the message is from the user. text (str): The message's text. attachments (list): The files attached to the message represented as a list of dictionaries [{"doc_id": <doc_id1>}, {"doc_id": <doc_id2>}]
response_type (str): Specifies the type of response to request from the LLM. One of 'text' and 'json'. If set to 'json', the LLM will respond with a json formatted string whose schema can be specified `json_response_schema`. Defaults to 'text'
json_response_schema (dict): A dictionary specifying the keys/schema/parameters which LLM should adhere to in its response when `response_type` is 'json'. Each parameter is mapped to a dict with the following info - type (str) (required): Data type of the parameter. description (str) (required): Description of the parameter. is_required (bool) (optional): Whether the parameter is required or not. Example: json_response_schema = {'title': {'type': 'string', 'description': 'Article title', 'is_required': true}, 'body': {'type': 'string', 'description': 'Article body'}}
stop_sequences (list): Specifies the strings on which the LLM will stop generation.
stop_sequences (List): Specifies the strings on which the LLM will stop generation.
top_p (float): The nucleus sampling value used for this run. If set, the model will sample from the smallest set of tokens whose cumulative probability exceeds the probability `top_p`. Default is 1.0. A range of 0.0 - 1.0 is allowed. It is generally recommended to use either temperature sampling or nucleus sampling, but not both.

Returns:
Expand Down
Loading

0 comments on commit 32465b2

Please sign in to comment.