Soumik555 commited on
Commit
3dadacf
·
1 Parent(s): 7f5f121
Files changed (2) hide show
  1. controller.py +1 -28
  2. groq_csv_agent.py +0 -187
controller.py CHANGED
@@ -35,7 +35,6 @@ from orchestrator_agent import csv_orchestrator_chat_gemini
35
  from python_code_executor_service import CsvChatResult, PythonExecutor
36
  from supabase_service import upload_file_to_supabase
37
  from cerebras_csv_agent import query_csv_agent_cerebras
38
- from groq_csv_agent import query_csv_agent_groq
39
  from util_service import _prompt_generator, process_answer
40
  from fastapi.middleware.cors import CORSMiddleware
41
 
@@ -456,18 +455,6 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
456
  logger.info("Processing detailed answer with orchestrator...")
457
  return await handle_detailed_answer(decoded_url, query, conversation_history, chat_id)
458
 
459
- # Process with standard CSV agent (Groq)
460
- logger.info("Processing with standard CSV agent (Groq)...")
461
- try:
462
- result = await query_csv_agent_groq(decoded_url, query, chat_id)
463
- logger.info(f"Standard CSV agent (Groq) result: {str(result)[:200]}...")
464
- if result is not None and result != "":
465
- return {"answer": result}
466
- else:
467
- logger.warning("Standard CSV agent (Groq) returned empty or None result")
468
- except Exception as e:
469
- logger.error(f"Standard CSV agent (Groq) failed: {str(e)}")
470
-
471
  # Process with standard CSV agent (Cerebras)
472
  logger.info("Groq failed. Processing with standard CSV agent (Cerebras)...")
473
  try:
@@ -738,21 +725,7 @@ async def csv_chart(request: dict, authorization: str = Header(None)):
738
 
739
  if orchestrator_answer is not None:
740
  return {"orchestrator_response": jsonable_encoder(orchestrator_answer)}
741
-
742
- # Next, try the groq-based method
743
- # groq_result = await loop.run_in_executor(
744
- # process_executor, groq_chart, csv_url, query
745
- # )
746
- # logger.info(f"Groq chart result: {groq_result}")
747
- # if isinstance(groq_result, str) and groq_result != "Chart not generated":
748
- # unique_file_name =f'{str(uuid.uuid4())}.png'
749
- # logger.info("Uploading the chart to supabase...")
750
- # image_public_url = await upload_file_to_supabase(f"{groq_result}", unique_file_name, chat_id=chat_id)
751
- # logger.info("Image uploaded to Supabase and Image URL is... ", {image_public_url})
752
- # os.remove(groq_result)
753
- # return {"image_url": image_public_url}
754
- # return FileResponse(groq_result, media_type="image/png")
755
-
756
  logger.info("Trying cerebras ai llama...")
757
  result = await query_csv_agent_cerebras(csv_url, query, chat_id)
758
  logger.info("cerebras ai result ==>", result)
 
35
  from python_code_executor_service import CsvChatResult, PythonExecutor
36
  from supabase_service import upload_file_to_supabase
37
  from cerebras_csv_agent import query_csv_agent_cerebras
 
38
  from util_service import _prompt_generator, process_answer
39
  from fastapi.middleware.cors import CORSMiddleware
40
 
 
455
  logger.info("Processing detailed answer with orchestrator...")
456
  return await handle_detailed_answer(decoded_url, query, conversation_history, chat_id)
457
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  # Process with standard CSV agent (Cerebras)
459
  logger.info("Groq failed. Processing with standard CSV agent (Cerebras)...")
460
  try:
 
725
 
726
  if orchestrator_answer is not None:
727
  return {"orchestrator_response": jsonable_encoder(orchestrator_answer)}
728
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  logger.info("Trying cerebras ai llama...")
730
  result = await query_csv_agent_cerebras(csv_url, query, chat_id)
731
  logger.info("cerebras ai result ==>", result)
groq_csv_agent.py DELETED
@@ -1,187 +0,0 @@
1
- import pandas as pd
2
- import json
3
- from typing import Optional
4
- from pydantic import BaseModel
5
- from dotenv import load_dotenv
6
- from pydantic_ai import Agent
7
- from csv_service import clean_data
8
- from python_code_executor_service import PythonExecutor
9
- from groq_instance_provider import InstanceProvider
10
- import logging
11
-
12
- load_dotenv()
13
-
14
- instance_provider = InstanceProvider()
15
-
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logging.getLogger(__name__)
18
-
19
- class CodeResponse(BaseModel):
20
- """Container for code-related responses"""
21
- language: str = "python"
22
- code: str
23
-
24
- class ChartSpecification(BaseModel):
25
- """Details about requested charts"""
26
- image_description: str
27
- code: Optional[str] = None
28
-
29
- class AnalysisOperation(BaseModel):
30
- """Container for a single analysis operation with its code and result"""
31
- code: CodeResponse
32
- result_var: str
33
-
34
- class CsvChatResult(BaseModel):
35
- """Structured response for CSV-related AI interactions"""
36
-
37
- # Casual chat response
38
- casual_response: str
39
-
40
- # Data analysis components
41
- # analysis_operations: List[AnalysisOperation]
42
- analysis_operations: Optional[AnalysisOperation]
43
-
44
- # Visualization components
45
- # charts: Optional[List[ChartSpecification]] = None
46
- charts: Optional[ChartSpecification]
47
-
48
-
49
- def get_csv_info(df: pd.DataFrame) -> dict:
50
- """Get metadata/info about the CSV"""
51
- info = {
52
- 'num_rows': len(df),
53
- 'num_cols': len(df.columns),
54
- 'example_rows': df.head(2).to_dict('records'),
55
- 'dtypes': {col: str(df[col].dtype) for col in df.columns},
56
- 'columns': list(df.columns),
57
- 'numeric_columns': [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])],
58
- 'categorical_columns': [col for col in df.columns if pd.api.types.is_string_dtype(df[col])]
59
- }
60
- return info
61
-
62
-
63
- def get_csv_system_prompt(df: pd.DataFrame) -> str:
64
- """Generate system prompt for CSV analysis with strict variable requirements"""
65
- csv_info = get_csv_info(df)
66
- prompt = f"""
67
- You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
68
-
69
- CSV Info:
70
- - Shape: {csv_info['num_rows']} rows × {csv_info['num_cols']} cols
71
- - Columns: {csv_info['columns']}
72
- - Sample: {csv_info['example_rows']}
73
- - Dtypes: {csv_info['dtypes']}
74
-
75
- STRICT REQUIREMENTS:
76
- 1. NEVER calculate or predict values yourself - ALWAYS return executable code that would produce the result
77
- 2. Use existing 'df' - never recreate it
78
- 3. ALWAYS assign results to variables with descriptive names
79
- 4. For any data structures (Lists, Records, Tables, Dictionaries, etc.), always return them as JSON with correct indentation
80
- 5. For charts:
81
- - Use matplotlib/seaborn only
82
- - Professional quality: proper sizing, labels, titles
83
- - Figure size: (14, 8) for complex, (12, 6) for simple
84
- - Clear titles (fontsize=16), labels (fontsize=14)
85
- - Rotate x-labels if needed (45°, fontsize=12)
86
- - Add annotations/gridlines where helpful
87
- - Use colorblind-friendly palettes
88
- - Always include plt.tight_layout()
89
-
90
- VARIABLE ASSIGNMENT RULES:
91
- 1. Every operation must store its result in a variable
92
- 2. Variable names should be descriptive and snake_case
93
- 3. For DataFrame operations: result_df = df.operation()
94
- 4. For statistical results: stats_result = df['column'].describe()
95
- 5. For visualizations: assign the figure/axis objects when needed
96
- 6. For filtered data: filtered_data = df[df['column'] > value]
97
-
98
- Example professional chart (with variable assignment):
99
- fig, ax = plt.subplots(figsize=(14, 8))
100
- sns.barplot(x='category', y='value', data=df, palette='muted', ax=ax)
101
- ax.set_title('Value by Category', fontsize=16)
102
- ax.set_xlabel('Category', fontsize=14)
103
- ax.set_ylabel('Value', fontsize=14)
104
- ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
105
- ax.grid(alpha=0.3)
106
- plt.tight_layout()
107
- plt.show()
108
-
109
- Example professional response for dataframe operations:
110
- # GOOD (with variable assignment)
111
- sample_transactions = df.sample(5)[['id', 'date', 'amount']]
112
- transaction_stats = df['amount'].describe()
113
-
114
- # BAD (no variable assignment)
115
- df.sample(5)[['id', 'date', 'amount']] # No variable assigned!
116
-
117
- Example professional response for analysis:
118
- # Calculate average by category
119
- category_means = df.groupby('category')['value'].mean().reset_index()
120
-
121
- # Get top 10 items
122
- top_items = df.nlargest(10, 'sales')
123
-
124
- Return complete, executable code that follows these rules.
125
- Your code MUST assign all results to variables.
126
- """
127
- return prompt
128
-
129
-
130
- def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent:
131
- """Create and return a CSV analysis agent with API key rotation"""
132
- csv_system_prompt = get_csv_system_prompt(df)
133
-
134
- for attempt in range(max_retries):
135
- try:
136
- model = instance_provider.get_instance()
137
- if model is None:
138
- raise RuntimeError("No available API instances")
139
-
140
- csv_agent = Agent(
141
- model=model,
142
- output_type=CsvChatResult,
143
- system_prompt=csv_system_prompt,
144
- )
145
-
146
- return csv_agent
147
-
148
- except Exception as e:
149
- api_key = instance_provider.get_api_key_for_model(model)
150
- if api_key:
151
- logger.info(f"Error with API key (attempt {attempt + 1}): {str(e)}")
152
- instance_provider.report_error(api_key)
153
- continue
154
-
155
- raise RuntimeError(f"Failed to create agent after {max_retries} attempts")
156
-
157
-
158
- async def query_csv_agent_groq(csv_url: str, question: str, chat_id: str) -> str:
159
- """Query the CSV agent with a DataFrame and question and return formatted output"""
160
-
161
- # Get the DataFrame from the CSV URL
162
- df = clean_data(csv_url)
163
-
164
- # Create agent and get response
165
- agent = create_csv_agent(df)
166
- result = await agent.run(question)
167
-
168
- # Process the response through PythonExecutor
169
- executor = PythonExecutor(df)
170
-
171
- # Convert the raw output to CsvChatResult if needed
172
- if not isinstance(result.output, CsvChatResult):
173
- # Handle case where output needs conversion
174
- try:
175
- response_data = result.output if isinstance(result.output, dict) else json.loads(result.output)
176
- chat_result = CsvChatResult(**response_data)
177
- except Exception as e:
178
- raise ValueError(f"Could not parse agent response: {str(e)}")
179
- else:
180
- chat_result = result.output
181
-
182
- logger.info("Chat Result Original Object:", chat_result)
183
-
184
- # Process and format the response
185
- formatted_output = await executor.process_response(chat_result, chat_id)
186
-
187
- return formatted_output