whatever
Browse files- controller.py +1 -28
- groq_csv_agent.py +0 -187
controller.py
CHANGED
|
@@ -35,7 +35,6 @@ from orchestrator_agent import csv_orchestrator_chat_gemini
|
|
| 35 |
from python_code_executor_service import CsvChatResult, PythonExecutor
|
| 36 |
from supabase_service import upload_file_to_supabase
|
| 37 |
from cerebras_csv_agent import query_csv_agent_cerebras
|
| 38 |
-
from groq_csv_agent import query_csv_agent_groq
|
| 39 |
from util_service import _prompt_generator, process_answer
|
| 40 |
from fastapi.middleware.cors import CORSMiddleware
|
| 41 |
|
|
@@ -456,18 +455,6 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
|
|
| 456 |
logger.info("Processing detailed answer with orchestrator...")
|
| 457 |
return await handle_detailed_answer(decoded_url, query, conversation_history, chat_id)
|
| 458 |
|
| 459 |
-
# Process with standard CSV agent (Groq)
|
| 460 |
-
logger.info("Processing with standard CSV agent (Groq)...")
|
| 461 |
-
try:
|
| 462 |
-
result = await query_csv_agent_groq(decoded_url, query, chat_id)
|
| 463 |
-
logger.info(f"Standard CSV agent (Groq) result: {str(result)[:200]}...")
|
| 464 |
-
if result is not None and result != "":
|
| 465 |
-
return {"answer": result}
|
| 466 |
-
else:
|
| 467 |
-
logger.warning("Standard CSV agent (Groq) returned empty or None result")
|
| 468 |
-
except Exception as e:
|
| 469 |
-
logger.error(f"Standard CSV agent (Groq) failed: {str(e)}")
|
| 470 |
-
|
| 471 |
# Process with standard CSV agent (Cerebras)
|
| 472 |
logger.info("Groq failed. Processing with standard CSV agent (Cerebras)...")
|
| 473 |
try:
|
|
@@ -738,21 +725,7 @@ async def csv_chart(request: dict, authorization: str = Header(None)):
|
|
| 738 |
|
| 739 |
if orchestrator_answer is not None:
|
| 740 |
return {"orchestrator_response": jsonable_encoder(orchestrator_answer)}
|
| 741 |
-
|
| 742 |
-
# Next, try the groq-based method
|
| 743 |
-
# groq_result = await loop.run_in_executor(
|
| 744 |
-
# process_executor, groq_chart, csv_url, query
|
| 745 |
-
# )
|
| 746 |
-
# logger.info(f"Groq chart result: {groq_result}")
|
| 747 |
-
# if isinstance(groq_result, str) and groq_result != "Chart not generated":
|
| 748 |
-
# unique_file_name =f'{str(uuid.uuid4())}.png'
|
| 749 |
-
# logger.info("Uploading the chart to supabase...")
|
| 750 |
-
# image_public_url = await upload_file_to_supabase(f"{groq_result}", unique_file_name, chat_id=chat_id)
|
| 751 |
-
# logger.info("Image uploaded to Supabase and Image URL is... ", {image_public_url})
|
| 752 |
-
# os.remove(groq_result)
|
| 753 |
-
# return {"image_url": image_public_url}
|
| 754 |
-
# return FileResponse(groq_result, media_type="image/png")
|
| 755 |
-
|
| 756 |
logger.info("Trying cerebras ai llama...")
|
| 757 |
result = await query_csv_agent_cerebras(csv_url, query, chat_id)
|
| 758 |
logger.info("cerebras ai result ==>", result)
|
|
|
|
| 35 |
from python_code_executor_service import CsvChatResult, PythonExecutor
|
| 36 |
from supabase_service import upload_file_to_supabase
|
| 37 |
from cerebras_csv_agent import query_csv_agent_cerebras
|
|
|
|
| 38 |
from util_service import _prompt_generator, process_answer
|
| 39 |
from fastapi.middleware.cors import CORSMiddleware
|
| 40 |
|
|
|
|
| 455 |
logger.info("Processing detailed answer with orchestrator...")
|
| 456 |
return await handle_detailed_answer(decoded_url, query, conversation_history, chat_id)
|
| 457 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
# Process with standard CSV agent (Cerebras)
|
| 459 |
logger.info("Groq failed. Processing with standard CSV agent (Cerebras)...")
|
| 460 |
try:
|
|
|
|
| 725 |
|
| 726 |
if orchestrator_answer is not None:
|
| 727 |
return {"orchestrator_response": jsonable_encoder(orchestrator_answer)}
|
| 728 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
logger.info("Trying cerebras ai llama...")
|
| 730 |
result = await query_csv_agent_cerebras(csv_url, query, chat_id)
|
| 731 |
logger.info("cerebras ai result ==>", result)
|
groq_csv_agent.py
DELETED
|
@@ -1,187 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import json
|
| 3 |
-
from typing import Optional
|
| 4 |
-
from pydantic import BaseModel
|
| 5 |
-
from dotenv import load_dotenv
|
| 6 |
-
from pydantic_ai import Agent
|
| 7 |
-
from csv_service import clean_data
|
| 8 |
-
from python_code_executor_service import PythonExecutor
|
| 9 |
-
from groq_instance_provider import InstanceProvider
|
| 10 |
-
import logging
|
| 11 |
-
|
| 12 |
-
load_dotenv()
|
| 13 |
-
|
| 14 |
-
instance_provider = InstanceProvider()
|
| 15 |
-
|
| 16 |
-
logging.basicConfig(level=logging.INFO)
|
| 17 |
-
logger = logging.getLogger(__name__)
|
| 18 |
-
|
| 19 |
-
class CodeResponse(BaseModel):
|
| 20 |
-
"""Container for code-related responses"""
|
| 21 |
-
language: str = "python"
|
| 22 |
-
code: str
|
| 23 |
-
|
| 24 |
-
class ChartSpecification(BaseModel):
|
| 25 |
-
"""Details about requested charts"""
|
| 26 |
-
image_description: str
|
| 27 |
-
code: Optional[str] = None
|
| 28 |
-
|
| 29 |
-
class AnalysisOperation(BaseModel):
|
| 30 |
-
"""Container for a single analysis operation with its code and result"""
|
| 31 |
-
code: CodeResponse
|
| 32 |
-
result_var: str
|
| 33 |
-
|
| 34 |
-
class CsvChatResult(BaseModel):
|
| 35 |
-
"""Structured response for CSV-related AI interactions"""
|
| 36 |
-
|
| 37 |
-
# Casual chat response
|
| 38 |
-
casual_response: str
|
| 39 |
-
|
| 40 |
-
# Data analysis components
|
| 41 |
-
# analysis_operations: List[AnalysisOperation]
|
| 42 |
-
analysis_operations: Optional[AnalysisOperation]
|
| 43 |
-
|
| 44 |
-
# Visualization components
|
| 45 |
-
# charts: Optional[List[ChartSpecification]] = None
|
| 46 |
-
charts: Optional[ChartSpecification]
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
def get_csv_info(df: pd.DataFrame) -> dict:
|
| 50 |
-
"""Get metadata/info about the CSV"""
|
| 51 |
-
info = {
|
| 52 |
-
'num_rows': len(df),
|
| 53 |
-
'num_cols': len(df.columns),
|
| 54 |
-
'example_rows': df.head(2).to_dict('records'),
|
| 55 |
-
'dtypes': {col: str(df[col].dtype) for col in df.columns},
|
| 56 |
-
'columns': list(df.columns),
|
| 57 |
-
'numeric_columns': [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])],
|
| 58 |
-
'categorical_columns': [col for col in df.columns if pd.api.types.is_string_dtype(df[col])]
|
| 59 |
-
}
|
| 60 |
-
return info
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
def get_csv_system_prompt(df: pd.DataFrame) -> str:
|
| 64 |
-
"""Generate system prompt for CSV analysis with strict variable requirements"""
|
| 65 |
-
csv_info = get_csv_info(df)
|
| 66 |
-
prompt = f"""
|
| 67 |
-
You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
|
| 68 |
-
|
| 69 |
-
CSV Info:
|
| 70 |
-
- Shape: {csv_info['num_rows']} rows × {csv_info['num_cols']} cols
|
| 71 |
-
- Columns: {csv_info['columns']}
|
| 72 |
-
- Sample: {csv_info['example_rows']}
|
| 73 |
-
- Dtypes: {csv_info['dtypes']}
|
| 74 |
-
|
| 75 |
-
STRICT REQUIREMENTS:
|
| 76 |
-
1. NEVER calculate or predict values yourself - ALWAYS return executable code that would produce the result
|
| 77 |
-
2. Use existing 'df' - never recreate it
|
| 78 |
-
3. ALWAYS assign results to variables with descriptive names
|
| 79 |
-
4. For any data structures (Lists, Records, Tables, Dictionaries, etc.), always return them as JSON with correct indentation
|
| 80 |
-
5. For charts:
|
| 81 |
-
- Use matplotlib/seaborn only
|
| 82 |
-
- Professional quality: proper sizing, labels, titles
|
| 83 |
-
- Figure size: (14, 8) for complex, (12, 6) for simple
|
| 84 |
-
- Clear titles (fontsize=16), labels (fontsize=14)
|
| 85 |
-
- Rotate x-labels if needed (45°, fontsize=12)
|
| 86 |
-
- Add annotations/gridlines where helpful
|
| 87 |
-
- Use colorblind-friendly palettes
|
| 88 |
-
- Always include plt.tight_layout()
|
| 89 |
-
|
| 90 |
-
VARIABLE ASSIGNMENT RULES:
|
| 91 |
-
1. Every operation must store its result in a variable
|
| 92 |
-
2. Variable names should be descriptive and snake_case
|
| 93 |
-
3. For DataFrame operations: result_df = df.operation()
|
| 94 |
-
4. For statistical results: stats_result = df['column'].describe()
|
| 95 |
-
5. For visualizations: assign the figure/axis objects when needed
|
| 96 |
-
6. For filtered data: filtered_data = df[df['column'] > value]
|
| 97 |
-
|
| 98 |
-
Example professional chart (with variable assignment):
|
| 99 |
-
fig, ax = plt.subplots(figsize=(14, 8))
|
| 100 |
-
sns.barplot(x='category', y='value', data=df, palette='muted', ax=ax)
|
| 101 |
-
ax.set_title('Value by Category', fontsize=16)
|
| 102 |
-
ax.set_xlabel('Category', fontsize=14)
|
| 103 |
-
ax.set_ylabel('Value', fontsize=14)
|
| 104 |
-
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
|
| 105 |
-
ax.grid(alpha=0.3)
|
| 106 |
-
plt.tight_layout()
|
| 107 |
-
plt.show()
|
| 108 |
-
|
| 109 |
-
Example professional response for dataframe operations:
|
| 110 |
-
# GOOD (with variable assignment)
|
| 111 |
-
sample_transactions = df.sample(5)[['id', 'date', 'amount']]
|
| 112 |
-
transaction_stats = df['amount'].describe()
|
| 113 |
-
|
| 114 |
-
# BAD (no variable assignment)
|
| 115 |
-
df.sample(5)[['id', 'date', 'amount']] # No variable assigned!
|
| 116 |
-
|
| 117 |
-
Example professional response for analysis:
|
| 118 |
-
# Calculate average by category
|
| 119 |
-
category_means = df.groupby('category')['value'].mean().reset_index()
|
| 120 |
-
|
| 121 |
-
# Get top 10 items
|
| 122 |
-
top_items = df.nlargest(10, 'sales')
|
| 123 |
-
|
| 124 |
-
Return complete, executable code that follows these rules.
|
| 125 |
-
Your code MUST assign all results to variables.
|
| 126 |
-
"""
|
| 127 |
-
return prompt
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent:
|
| 131 |
-
"""Create and return a CSV analysis agent with API key rotation"""
|
| 132 |
-
csv_system_prompt = get_csv_system_prompt(df)
|
| 133 |
-
|
| 134 |
-
for attempt in range(max_retries):
|
| 135 |
-
try:
|
| 136 |
-
model = instance_provider.get_instance()
|
| 137 |
-
if model is None:
|
| 138 |
-
raise RuntimeError("No available API instances")
|
| 139 |
-
|
| 140 |
-
csv_agent = Agent(
|
| 141 |
-
model=model,
|
| 142 |
-
output_type=CsvChatResult,
|
| 143 |
-
system_prompt=csv_system_prompt,
|
| 144 |
-
)
|
| 145 |
-
|
| 146 |
-
return csv_agent
|
| 147 |
-
|
| 148 |
-
except Exception as e:
|
| 149 |
-
api_key = instance_provider.get_api_key_for_model(model)
|
| 150 |
-
if api_key:
|
| 151 |
-
logger.info(f"Error with API key (attempt {attempt + 1}): {str(e)}")
|
| 152 |
-
instance_provider.report_error(api_key)
|
| 153 |
-
continue
|
| 154 |
-
|
| 155 |
-
raise RuntimeError(f"Failed to create agent after {max_retries} attempts")
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
async def query_csv_agent_groq(csv_url: str, question: str, chat_id: str) -> str:
|
| 159 |
-
"""Query the CSV agent with a DataFrame and question and return formatted output"""
|
| 160 |
-
|
| 161 |
-
# Get the DataFrame from the CSV URL
|
| 162 |
-
df = clean_data(csv_url)
|
| 163 |
-
|
| 164 |
-
# Create agent and get response
|
| 165 |
-
agent = create_csv_agent(df)
|
| 166 |
-
result = await agent.run(question)
|
| 167 |
-
|
| 168 |
-
# Process the response through PythonExecutor
|
| 169 |
-
executor = PythonExecutor(df)
|
| 170 |
-
|
| 171 |
-
# Convert the raw output to CsvChatResult if needed
|
| 172 |
-
if not isinstance(result.output, CsvChatResult):
|
| 173 |
-
# Handle case where output needs conversion
|
| 174 |
-
try:
|
| 175 |
-
response_data = result.output if isinstance(result.output, dict) else json.loads(result.output)
|
| 176 |
-
chat_result = CsvChatResult(**response_data)
|
| 177 |
-
except Exception as e:
|
| 178 |
-
raise ValueError(f"Could not parse agent response: {str(e)}")
|
| 179 |
-
else:
|
| 180 |
-
chat_result = result.output
|
| 181 |
-
|
| 182 |
-
logger.info("Chat Result Original Object:", chat_result)
|
| 183 |
-
|
| 184 |
-
# Process and format the response
|
| 185 |
-
formatted_output = await executor.process_response(chat_result, chat_id)
|
| 186 |
-
|
| 187 |
-
return formatted_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|