Spaces:
Sleeping
Sleeping
Deepak Sahu
commited on
Commit
·
33ff303
1
Parent(s):
fa3b522
adding first agent
Browse files- .gitattributes +1 -0
- Dockerfile +3 -0
- _agents/pandas_expression_generator/pyproject.toml +18 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/PKG-INFO +7 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/SOURCES.txt +10 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/dependency_links.txt +1 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/entry_points.txt +2 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/requires.txt +1 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/top_level.txt +1 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator/__init__.py +0 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator/__pycache__/__init__.cpython-311.pyc +0 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator/__pycache__/pandas_expression_generator_function.cpython-311.pyc +0 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator/__pycache__/register.cpython-311.pyc +0 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator/configs/config.yml +25 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator/pandas_expression_generator_function.py +150 -0
- _agents/pandas_expression_generator/src/pandas_expression_generator/register.py +5 -0
- _data/retail_customer/Retail_Customer_Cluster_Data.xlsx +3 -0
- _data/retail_customer/meta.md +3 -0
- _data/retail_customer/readme.md +3 -0
- config.yaml +9 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
_data/** filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
CHANGED
|
@@ -12,6 +12,9 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 12 |
|
| 13 |
COPY . .
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
# Make the script executable
|
| 16 |
RUN chmod +x start.sh
|
| 17 |
|
|
|
|
| 12 |
|
| 13 |
COPY . .
|
| 14 |
|
| 15 |
+
# Install agents
|
| 16 |
+
RUN pip install -e _agents/pandas_expression_generator
|
| 17 |
+
|
| 18 |
# Make the script executable
|
| 19 |
RUN chmod +x start.sh
|
| 20 |
|
_agents/pandas_expression_generator/pyproject.toml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
build-backend = "setuptools.build_meta"
|
| 3 |
+
requires = ["setuptools >= 64"]
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "pandas_expression_generator"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
dependencies = [
|
| 9 |
+
"aiqtoolkit[langchain]",
|
| 10 |
+
]
|
| 11 |
+
requires-python = ">=3.11,<3.13"
|
| 12 |
+
description = "Custom AIQ Toolkit Workflow"
|
| 13 |
+
classifiers = ["Programming Language :: Python"]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
[project.entry-points.'aiq.components']
|
| 18 |
+
pandas_expression_generator = "pandas_expression_generator.register"
|
_agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: pandas_expression_generator
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Custom AIQ Toolkit Workflow
|
| 5 |
+
Classifier: Programming Language :: Python
|
| 6 |
+
Requires-Python: <3.13,>=3.11
|
| 7 |
+
Requires-Dist: aiqtoolkit[langchain]
|
_agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyproject.toml
|
| 2 |
+
src/pandas_expression_generator/__init__.py
|
| 3 |
+
src/pandas_expression_generator/pandas_expression_generator_function.py
|
| 4 |
+
src/pandas_expression_generator/register.py
|
| 5 |
+
src/pandas_expression_generator.egg-info/PKG-INFO
|
| 6 |
+
src/pandas_expression_generator.egg-info/SOURCES.txt
|
| 7 |
+
src/pandas_expression_generator.egg-info/dependency_links.txt
|
| 8 |
+
src/pandas_expression_generator.egg-info/entry_points.txt
|
| 9 |
+
src/pandas_expression_generator.egg-info/requires.txt
|
| 10 |
+
src/pandas_expression_generator.egg-info/top_level.txt
|
_agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
_agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[aiq.components]
|
| 2 |
+
pandas_expression_generator = pandas_expression_generator.register
|
_agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/requires.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
aiqtoolkit[langchain]
|
_agents/pandas_expression_generator/src/pandas_expression_generator.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pandas_expression_generator
|
_agents/pandas_expression_generator/src/pandas_expression_generator/__init__.py
ADDED
|
File without changes
|
_agents/pandas_expression_generator/src/pandas_expression_generator/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (208 Bytes). View file
|
|
|
_agents/pandas_expression_generator/src/pandas_expression_generator/__pycache__/pandas_expression_generator_function.cpython-311.pyc
ADDED
|
Binary file (8.51 kB). View file
|
|
|
_agents/pandas_expression_generator/src/pandas_expression_generator/__pycache__/register.cpython-311.pyc
ADDED
|
Binary file (313 Bytes). View file
|
|
|
_agents/pandas_expression_generator/src/pandas_expression_generator/configs/config.yml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
general:
|
| 2 |
+
use_uvloop: true
|
| 3 |
+
logging:
|
| 4 |
+
console:
|
| 5 |
+
_type: console
|
| 6 |
+
level: WARN
|
| 7 |
+
|
| 8 |
+
front_end:
|
| 9 |
+
_type: fastapi
|
| 10 |
+
|
| 11 |
+
front_end:
|
| 12 |
+
_type: console
|
| 13 |
+
|
| 14 |
+
llms:
|
| 15 |
+
nim_llm:
|
| 16 |
+
_type: nim
|
| 17 |
+
model_name : meta/llama-3.1-405b-instruct
|
| 18 |
+
temperature: 0.0
|
| 19 |
+
|
| 20 |
+
workflow:
|
| 21 |
+
_type: pandas_expression_generator
|
| 22 |
+
llm: nim_llm
|
| 23 |
+
csv_file: "/workspaces/do_aiq2/_data/retail_customer/Retail_Customer_Cluster_Data.xlsx"
|
| 24 |
+
csv_metafile: "/workspaces/do_aiq2/_data/retail_customer/meta.md"
|
| 25 |
+
description: "Use this agent to handles retail customers data"
|
_agents/pandas_expression_generator/src/pandas_expression_generator/pandas_expression_generator_function.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
from pydantic import Field
|
| 4 |
+
|
| 5 |
+
from aiq.builder.builder import Builder
|
| 6 |
+
from aiq.builder.function_info import FunctionInfo
|
| 7 |
+
from aiq.cli.register_workflow import register_function
|
| 8 |
+
from aiq.data_models.function import FunctionBaseConfig
|
| 9 |
+
from aiq.data_models.component_ref import LLMRef
|
| 10 |
+
from aiq.builder.framework_enum import LLMFrameworkEnum
|
| 11 |
+
from langchain_core.messages import AIMessage
|
| 12 |
+
import pandas as pd
|
| 13 |
+
from pydantic import BaseModel
|
| 14 |
+
import numpy as np
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class PandasExpressionGeneratorFunctionConfig(FunctionBaseConfig, name="pandas_expression_generator"):
|
| 20 |
+
"""
|
| 21 |
+
AIQ Toolkit function template. Please update the description.
|
| 22 |
+
"""
|
| 23 |
+
# Add your custom configuration parameters here
|
| 24 |
+
# parameter: str = Field(default="default_value", description="Notional description for this parameter")
|
| 25 |
+
llm: LLMRef
|
| 26 |
+
csv_file: str
|
| 27 |
+
csv_metafile: str
|
| 28 |
+
description: str = Field(default="A dataset query engine", description="Describe for which data the engine is used for")
|
| 29 |
+
max_retries: int = Field(default=2)
|
| 30 |
+
|
| 31 |
+
class FunctionInput(BaseModel):
|
| 32 |
+
original_user_query: str = Field(description="The original user query as-is")
|
| 33 |
+
past_step: str = Field(description="Information/ output/ data from the previous action(s) if any else leave it empty string")
|
| 34 |
+
current_step: str = Field(description="Part of the intermediate step when resovling the big user query.")
|
| 35 |
+
next_step: str = Field(description="What next are we plannning to do once we perform the current step.")
|
| 36 |
+
|
| 37 |
+
@register_function(config_type=PandasExpressionGeneratorFunctionConfig)
|
| 38 |
+
async def pandas_expression_generator_function(
|
| 39 |
+
config: PandasExpressionGeneratorFunctionConfig, builder: Builder
|
| 40 |
+
):
|
| 41 |
+
df: pd.DataFrame = pd.read_excel(config.csv_file)
|
| 42 |
+
with open(config.csv_metafile, "r", encoding="utf-8") as f:
|
| 43 |
+
df_meta: str = f.read()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def extract_bracket_content(text):
|
| 47 |
+
start = text.find('[')
|
| 48 |
+
end = text.rfind(']') + 1 # include the last ']'
|
| 49 |
+
if start == -1 or end == -1 or start >= end:
|
| 50 |
+
return "" # return empty if brackets not found properly
|
| 51 |
+
return text[start:end]
|
| 52 |
+
|
| 53 |
+
import ast
|
| 54 |
+
def expression_executor(input_exp: str, df=None) -> tuple:
|
| 55 |
+
# Convert the input string to a list
|
| 56 |
+
commands = ast.literal_eval(input_exp)
|
| 57 |
+
|
| 58 |
+
# local_vars = dict([(str.strip, command.split('=', 1)) for command in commands])
|
| 59 |
+
local_vars:dict = dict([list(map(str.strip, command.split('=', 1))) for command in commands])
|
| 60 |
+
|
| 61 |
+
# Dictionary to simulate local scope
|
| 62 |
+
local_vars.update({'df': df, 'pd': pd, 'np': np})
|
| 63 |
+
# local_vars = {}
|
| 64 |
+
for command in commands:
|
| 65 |
+
exec(command, globals(), local_vars)
|
| 66 |
+
|
| 67 |
+
# for command in commands:
|
| 68 |
+
# # Split at the first `=`
|
| 69 |
+
# var, expr = map(str.strip, command.split('=', 1))
|
| 70 |
+
# # Evaluate the expression in the local_vars context and assign to var
|
| 71 |
+
# local_vars[var] = eval(expr, {}, local_vars)
|
| 72 |
+
return local_vars.get("output", None)
|
| 73 |
+
try:
|
| 74 |
+
output
|
| 75 |
+
# output = scope.get("output")
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.error("[expression_executor] ", e)
|
| 78 |
+
output = None
|
| 79 |
+
return output # return all evaluated variables
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
# Implement your function logic here
|
| 84 |
+
async def _response_fn(input_query: FunctionInput) -> str:
|
| 85 |
+
|
| 86 |
+
# Create LLM
|
| 87 |
+
llm_ = await builder.get_llm(config.llm, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
|
| 88 |
+
|
| 89 |
+
# Prompt
|
| 90 |
+
prompt_ = f'''You are a python expression generator.
|
| 91 |
+
|
| 92 |
+
Given the dataframe `df` and pandas import as `pd` which has the following metadata
|
| 93 |
+
|
| 94 |
+
CSV Meta
|
| 95 |
+
---
|
| 96 |
+
{df_meta}
|
| 97 |
+
|
| 98 |
+
Generate python expression to solve the query:
|
| 99 |
+
current_step: {input_query.current_step}
|
| 100 |
+
|
| 101 |
+
You can reformulate(extend or reduce) the current step based on the following information
|
| 102 |
+
original_user_query: {input_query.original_user_query}
|
| 103 |
+
past_step: {input_query.past_step}
|
| 104 |
+
next_step: {input_query.next_step}
|
| 105 |
+
|
| 106 |
+
You must generate list of python expressions output format must as follows:
|
| 107 |
+
|
| 108 |
+
[
|
| 109 |
+
"key_var = df[...]" // intermediate step
|
| 110 |
+
"key_var2 = key_var[df[...]...]" // intermediate step
|
| 111 |
+
|
| 112 |
+
// reset the index of df if requrired
|
| 113 |
+
// convert the grouped dataframe to dictionaries
|
| 114 |
+
//
|
| 115 |
+
"output" = sting. Have the string as list (records) of dictionaries (columns). Ensure to reset index if required
|
| 116 |
+
]
|
| 117 |
+
|
| 118 |
+
DONT add prefix or suffix the text in output, just output as described format.
|
| 119 |
+
|
| 120 |
+
Certain template questions and expected approach from you
|
| 121 |
+
1. When user wants to what is present in the data
|
| 122 |
+
> you should respond back with names of columns
|
| 123 |
+
'''
|
| 124 |
+
|
| 125 |
+
# just var declaration
|
| 126 |
+
df
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
ai_message: AIMessage = await llm_.ainvoke(prompt_)
|
| 130 |
+
structured_text_str = extract_bracket_content(ai_message.content)
|
| 131 |
+
output_message = str(expression_executor(structured_text_str, df))[:250]
|
| 132 |
+
#
|
| 133 |
+
# Loop break
|
| 134 |
+
except Exception as e:
|
| 135 |
+
logger.error(str(e))
|
| 136 |
+
logger.info("Retrying...")
|
| 137 |
+
output_message = "Unable to serve the request."
|
| 138 |
+
# output_message = f"Hello from pandas_expression_generator workflow! You said: {input_message}"
|
| 139 |
+
|
| 140 |
+
return output_message
|
| 141 |
+
|
| 142 |
+
try:
|
| 143 |
+
yield FunctionInfo.from_fn(
|
| 144 |
+
_response_fn,
|
| 145 |
+
description=config.description
|
| 146 |
+
)
|
| 147 |
+
except GeneratorExit:
|
| 148 |
+
print("Function exited early!")
|
| 149 |
+
finally:
|
| 150 |
+
print("Cleaning up pandas_expression_generator workflow.")
|
_agents/pandas_expression_generator/src/pandas_expression_generator/register.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pylint: disable=unused-import
|
| 2 |
+
# flake8: noqa
|
| 3 |
+
|
| 4 |
+
# Import any tools which need to be automatically registered here
|
| 5 |
+
from pandas_expression_generator import pandas_expression_generator_function
|
_data/retail_customer/Retail_Customer_Cluster_Data.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:643249aed77417875d9de227e9278f19a6fa015a27cccdcc1bfa6f30f96300ca
|
| 3 |
+
size 14884
|
_data/retail_customer/meta.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acc3e3133f0223c972115dc4513b6f44d0e59cf33330b05cf036b38e23df1d25
|
| 3 |
+
size 2881
|
_data/retail_customer/readme.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ca0862d59bca2daf655928d57abc9684c9f844e2715bc868157d89809cb1477
|
| 3 |
+
size 51
|
config.yaml
CHANGED
|
@@ -20,10 +20,19 @@ llms:
|
|
| 20 |
functions:
|
| 21 |
current_datetime:
|
| 22 |
_type: current_datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
workflow:
|
| 25 |
_type: react_agent
|
| 26 |
tool_names:
|
|
|
|
| 27 |
- current_datetime
|
| 28 |
llm_name: nim_llm
|
| 29 |
verbose: true
|
|
|
|
| 20 |
functions:
|
| 21 |
current_datetime:
|
| 22 |
_type: current_datetime
|
| 23 |
+
|
| 24 |
+
retail_customer_data:
|
| 25 |
+
_type: pandas_expression_generator
|
| 26 |
+
llm: nim_llm
|
| 27 |
+
csv_file: "/app/_data/retail_customer/Retail_Customer_Cluster_Data.xlsx"
|
| 28 |
+
csv_metafile: "/app/_data/retail_customer/meta.md"
|
| 29 |
+
description: "Use this tool to get data related to Customer Retails."
|
| 30 |
+
|
| 31 |
|
| 32 |
workflow:
|
| 33 |
_type: react_agent
|
| 34 |
tool_names:
|
| 35 |
+
- retail_customer_data
|
| 36 |
- current_datetime
|
| 37 |
llm_name: nim_llm
|
| 38 |
verbose: true
|