Soumik555 commited on
Commit
a7c62a2
·
1 Parent(s): c281676

changed model to .env gemini-flash-2.0

Browse files
cerebras_report_generator.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import pandas as pd
4
+ import re
5
+ import os
6
+ import uuid
7
+ import logging
8
+ from io import StringIO
9
+ import sys
10
+ import traceback
11
+ from typing import Optional, Dict, Any, List
12
+ from pydantic import BaseModel, Field
13
+ from openai import OpenAI
14
+ from dotenv import load_dotenv
15
+ import seaborn as sns
16
+ import datetime as dt
17
+
18
+ from supabase_service import upload_file_to_supabase
19
+
20
+ pd.set_option('display.max_columns', None)
21
+ pd.set_option('display.max_rows', None)
22
+ pd.set_option('display.max_colwidth', None)
23
+
24
+ load_dotenv()
25
+
26
+ API_KEYS = os.getenv("CEREBRAS_API_KEYS", "").split(",")[::-1]
27
+ MODEL_NAME = os.getenv("CEREBRAS_MODEL") # Default Cerebras model
28
+ CEREBRAS_BASE_URL = os.getenv("CEREBRAS_BASE_URL")
29
+
30
+ class FileProps(BaseModel):
31
+ fileName: str
32
+ filePath: str
33
+ fileType: str # 'csv' | 'image'
34
+
35
+ class Files(BaseModel):
36
+ csv_files: List[FileProps]
37
+ image_files: List[FileProps]
38
+
39
+ class FileBoxProps(BaseModel):
40
+ files: Files
41
+
42
+ os.environ['MPLBACKEND'] = 'agg'
43
+ import matplotlib.pyplot as plt
44
+ plt.show = lambda: None
45
+
46
+ logging.basicConfig(
47
+ level=logging.INFO,
48
+ format='%(asctime)s - %(levelname)s - %(message)s'
49
+ )
50
+ logger = logging.getLogger(__name__)
51
+
52
+ class CerebrasKeyManager:
53
+ """Manage multiple Cerebras API keys with failover"""
54
+
55
+ def __init__(self, api_keys: List[str], base_url: str):
56
+ self.original_keys = api_keys.copy()
57
+ self.available_keys = api_keys.copy()
58
+ self.base_url = base_url
59
+ self.active_key = None
60
+ self.failed_keys = {}
61
+ self.client = None
62
+
63
+ def configure(self) -> bool:
64
+ while self.available_keys:
65
+ key = self.available_keys.pop(0)
66
+ try:
67
+ self.client = OpenAI(
68
+ api_key=key,
69
+ base_url=self.base_url
70
+ )
71
+ # Test the connection with a simple request
72
+ response = self.client.models.list()
73
+ self.active_key = key
74
+ logger.info(f"Configured with key: {self._mask_key(key)}")
75
+ return True
76
+ except Exception as e:
77
+ self.failed_keys[key] = str(e)
78
+ logger.error(f"Key failed: {self._mask_key(key)}. Error: {str(e)}")
79
+ logger.critical("All API keys failed")
80
+ return False
81
+
82
+ def _mask_key(self, key: str) -> str:
83
+ return f"{key[:8]}...{key[-4:]}" if key else ""
84
+
85
+ class PythonREPL:
86
+ """Secure Python REPL with file generation tracking"""
87
+
88
+ def __init__(self, df: pd.DataFrame):
89
+ self.df = df
90
+ self.output_dir = os.path.abspath(f'generated_outputs/{uuid.uuid4()}')
91
+ os.makedirs(self.output_dir, exist_ok=True)
92
+ self.local_env = {
93
+ "pd": pd,
94
+ "df": self.df.copy(),
95
+ "plt": plt,
96
+ "os": os,
97
+ "uuid": uuid,
98
+ "sns": sns,
99
+ "json": json,
100
+ "dt": dt,
101
+ "output_dir": self.output_dir
102
+ }
103
+
104
+ def execute(self, code: str) -> Dict[str, Any]:
105
+ print('Executing code...', code)
106
+ old_stdout = sys.stdout
107
+ sys.stdout = mystdout = StringIO()
108
+ file_tracker = {
109
+ 'csv_files': set(),
110
+ 'image_files': set()
111
+ }
112
+
113
+ try:
114
+ code = f"""
115
+ import matplotlib.pyplot as plt
116
+ plt.switch_backend('agg')
117
+ {code}
118
+ plt.close('all')
119
+ """
120
+ exec(code, self.local_env)
121
+ self.df = self.local_env.get('df', self.df)
122
+
123
+ # Track generated files
124
+ for fname in os.listdir(self.output_dir):
125
+ if fname.endswith('.csv'):
126
+ file_tracker['csv_files'].add(fname)
127
+ elif fname.lower().endswith(('.png', '.jpg', '.jpeg')):
128
+ file_tracker['image_files'].add(fname)
129
+
130
+ error = False
131
+ except Exception as e:
132
+ error_msg = traceback.format_exc()
133
+ error = True
134
+ finally:
135
+ sys.stdout = old_stdout
136
+
137
+ return {
138
+ "output": mystdout.getvalue(),
139
+ "error": error,
140
+ "error_message": error_msg if error else None,
141
+ "df": self.local_env.get('df', self.df),
142
+ "output_dir": self.output_dir,
143
+ "files": {
144
+ "csv": [os.path.join(self.output_dir, f) for f in file_tracker['csv_files']],
145
+ "images": [os.path.join(self.output_dir, f) for f in file_tracker['image_files']]
146
+ }
147
+ }
148
+
149
+ class RethinkAgent(BaseModel):
150
+ df: pd.DataFrame
151
+ max_retries: int = Field(default=5, ge=1)
152
+ cerebras_client: Optional[OpenAI] = None
153
+ model_name: str = Field(default="llama3.1-8b")
154
+ current_retry: int = Field(default=0, ge=0)
155
+ repl: Optional[PythonREPL] = None
156
+ key_manager: Optional[CerebrasKeyManager] = None
157
+ conversation: List[Dict[str, Any]] = []
158
+
159
+ class Config:
160
+ arbitrary_types_allowed = True
161
+
162
+ def _extract_code(self, response: str) -> str:
163
+ code_match = re.search(r'```python(.*?)```', response, re.DOTALL)
164
+ return code_match.group(1).strip() if code_match else response.strip()
165
+
166
+ def _generate_initial_prompt(self, query: str) -> str:
167
+ initial_prompt = f"""Generate DIRECT EXECUTION CODE (no functions, no explanations) following STRICT RULES:
168
+
169
+ CONVERSATION HISTORY:
170
+ {self.conversation}
171
+
172
+ MANDATORY REQUIREMENTS:
173
+ 1. Operate directly on existing 'df' variable
174
+ 2. Save ALL final DataFrames to CSV using: df.to_csv(f'{{output_dir}}/descriptive_name.csv')
175
+ 3. For visualizations: plt.savefig(f'{{output_dir}}/chart_name.png')
176
+ 4. Use EXACTLY this structure:
177
+ # Data processing
178
+ df_processed = df[...] # filtering/grouping
179
+ # Save results
180
+ df_processed.to_csv(f'{{output_dir}}/result.csv')
181
+ # Visualizations (if needed)
182
+ plt.figure()
183
+ ... plotting code ...
184
+ plt.savefig(f'{{output_dir}}/chart.png')
185
+ plt.close()
186
+
187
+ FORBIDDEN:
188
+ - Function definitions
189
+ - Dummy data creation
190
+ - Any code blocks besides pandas operations and matplotlib
191
+ - Print statements showing dataframes
192
+ - Using any visualization library other than matplotlib or seaborn
193
+
194
+ DATAFRAME COLUMNS: {', '.join(self.df.columns)}
195
+ DATAFRAME'S FIRST FIVE ROWS: {self.df.head().to_dict('records')}
196
+ USER QUERY: {query}
197
+
198
+ EXAMPLE RESPONSE FOR "Sales by region":
199
+ # Data processing
200
+ sales_by_region = df.groupby('region')['sales'].sum().reset_index()
201
+ # Save results
202
+ sales_by_region.to_csv(f'{{output_dir}}/sales_by_region.csv')
203
+ """
204
+ logger.info('Conversation history:', self.conversation)
205
+ return initial_prompt
206
+
207
+ def _generate_retry_prompt(self, query: str, error: str, code: str) -> str:
208
+ return f"""FIX THIS CODE (failed with: {error}) by STRICTLY FOLLOWING:
209
+
210
+ 1. REMOVE ALL FUNCTION DEFINITIONS
211
+ 2. ENSURE DIRECT DF OPERATIONS
212
+ 3. USE EXPLICIT output_dir PATHS
213
+ 4. ADD NECESSARY IMPORTS IF MISSING
214
+ 5. VALIDATE COLUMN NAMES EXIST
215
+
216
+ BAD CODE:
217
+ {code}
218
+
219
+ CORRECTED CODE:"""
220
+
221
+ def initialize_model(self, api_keys: List[str], base_url: str) -> bool:
222
+ self.key_manager = CerebrasKeyManager(api_keys, base_url)
223
+ if not self.key_manager.configure():
224
+ raise RuntimeError("API key initialization failed")
225
+ try:
226
+ self.cerebras_client = self.key_manager.client
227
+ return True
228
+ except Exception as e:
229
+ logger.error(f"Model init failed: {str(e)}")
230
+ return False
231
+
232
+ def generate_code(self, query: str, error: Optional[str] = None, previous_code: Optional[str] = None) -> str:
233
+ prompt = self._generate_retry_prompt(query, error, previous_code) if error else self._generate_initial_prompt(query)
234
+ try:
235
+ response = self.cerebras_client.chat.completions.create(
236
+ model=self.model_name,
237
+ messages=[
238
+ {"role": "system", "content": "You are a Python code generator. Generate only executable Python code without explanations."},
239
+ {"role": "user", "content": prompt}
240
+ ],
241
+ max_tokens=2048,
242
+ temperature=0.1
243
+ )
244
+ return self._extract_code(response.choices[0].message.content)
245
+ except Exception as e:
246
+ if self.key_manager.available_keys and self.key_manager.configure():
247
+ self.cerebras_client = self.key_manager.client
248
+ return self.generate_code(query, error, previous_code)
249
+ raise
250
+
251
+ def execute_query(self, query: str) -> Dict[str, Any]:
252
+ self.repl = PythonREPL(self.df)
253
+ result = None
254
+
255
+ while self.current_retry < self.max_retries:
256
+ try:
257
+ code = self.generate_code(query,
258
+ result["error_message"] if result else None,
259
+ result["code"] if result else None)
260
+ execution_result = self.repl.execute(code)
261
+
262
+ if execution_result["error"]:
263
+ self.current_retry += 1
264
+ result = {
265
+ "error_message": execution_result["error_message"],
266
+ "code": code
267
+ }
268
+ else:
269
+ return {
270
+ "text": execution_result["output"],
271
+ "csv_files": execution_result["files"]["csv"],
272
+ "image_files": execution_result["files"]["images"]
273
+ }
274
+ except Exception as e:
275
+ return {
276
+ "error": f"Critical failure: {str(e)}",
277
+ "csv_files": [],
278
+ "image_files": []
279
+ }
280
+
281
+ return {
282
+ "error": f"Failed after {self.max_retries} retries",
283
+ "csv_files": [],
284
+ "image_files": []
285
+ }
286
+
287
+ def cerebras_llm_chat(csv_url: str, query: str, conversation_history: List[Dict[str, Any]]) -> Dict[str, Any]:
288
+ try:
289
+ df = pd.read_csv(csv_url)
290
+ agent = RethinkAgent(df=df, conversation=conversation_history, model_name=MODEL_NAME)
291
+
292
+ if not agent.initialize_model(API_KEYS, CEREBRAS_BASE_URL):
293
+ return {"error": "API configuration failed"}
294
+
295
+ result = agent.execute_query(query)
296
+
297
+ if "error" in result:
298
+ return result
299
+
300
+ return {
301
+ "message": result["text"],
302
+ "csv_files": result["csv_files"],
303
+ "image_files": result["image_files"]
304
+ }
305
+ except Exception as e:
306
+ logger.error(f"Processing failed: {str(e)}")
307
+ return {
308
+ "error": f"Processing error: {str(e)}",
309
+ "csv_files": [],
310
+ "image_files": []
311
+ }
312
+
313
+
314
+ async def generate_csv_report_cerebras(csv_url: str, query: str, chat_id: str, conversation_history: List[Dict[str, Any]]) -> FileBoxProps:
315
+ try:
316
+ result = cerebras_llm_chat(csv_url, query, conversation_history)
317
+ logger.info(f"Raw result from cerebras_llm_chat: {result}")
318
+
319
+ csv_files = []
320
+ image_files = []
321
+
322
+ # Check if we got the expected response structure
323
+ if isinstance(result, dict) and 'csv_files' in result and 'image_files' in result:
324
+ # Process CSV files
325
+ for csv_path in result['csv_files']:
326
+ if os.path.exists(csv_path):
327
+ file_name = os.path.basename(csv_path)
328
+ try:
329
+ unique_file_name = f"{uuid.uuid4()}_{file_name}"
330
+ public_url = await upload_file_to_supabase(
331
+ file_path=csv_path,
332
+ file_name=unique_file_name,
333
+ chat_id=chat_id
334
+ )
335
+ csv_files.append(FileProps(
336
+ fileName=file_name,
337
+ filePath=public_url,
338
+ fileType="csv"
339
+ ))
340
+ os.remove(csv_path) # Clean up
341
+ except Exception as upload_error:
342
+ logger.error(f"Failed to upload CSV {file_name}: {str(upload_error)}")
343
+ continue
344
+
345
+ # Process image files
346
+ for img_path in result['image_files']:
347
+ if os.path.exists(img_path):
348
+ file_name = os.path.basename(img_path)
349
+ try:
350
+ unique_file_name = f"{uuid.uuid4()}_{file_name}"
351
+ public_url = await upload_file_to_supabase(
352
+ file_path=img_path,
353
+ file_name=unique_file_name,
354
+ chat_id=chat_id
355
+ )
356
+ image_files.append(FileProps(
357
+ fileName=file_name,
358
+ filePath=public_url,
359
+ fileType="image"
360
+ ))
361
+ os.remove(img_path) # Clean up
362
+ except Exception as upload_error:
363
+ logger.error(f"Failed to upload image {file_name}: {str(upload_error)}")
364
+ continue
365
+
366
+ return FileBoxProps(
367
+ files=Files(
368
+ csv_files=csv_files,
369
+ image_files=image_files
370
+ )
371
+ )
372
+ else:
373
+ raise ValueError("Unexpected response format from cerebras_llm_chat")
374
+
375
+ except Exception as e:
376
+ logger.error(f"Report generation failed: {str(e)}")
377
+ # Return empty response but log the files we found
378
+ if 'csv_files' in locals() and 'image_files' in locals():
379
+ logger.info(f"Files that were generated but not processed: CSV: {result.get('csv_files', [])}, Images: {result.get('image_files', [])}")
380
+ return FileBoxProps(
381
+ files=Files(
382
+ csv_files=[],
383
+ image_files=[]
384
+ )
385
+ )
controller.py CHANGED
@@ -4,6 +4,7 @@ import logging
4
  import os
5
  import asyncio
6
  import threading
 
7
  import uuid
8
  from fastapi import FastAPI, HTTPException, Header
9
  from fastapi.encoders import jsonable_encoder
@@ -25,7 +26,7 @@ import numpy as np
25
  import matplotlib.pyplot as plt
26
  import matplotlib
27
  import seaborn as sns
28
- from gemini_report_generator import generate_csv_report
29
  from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
30
  from orchestrator_agent import csv_orchestrator_chat
31
  from python_code_executor_service import CsvChatResult, PythonExecutor
@@ -33,6 +34,7 @@ from supabase_service import upload_file_to_supabase
33
  from cerebras_csv_agent import query_csv_agent
34
  from util_service import _prompt_generator, process_answer
35
  from fastapi.middleware.cors import CORSMiddleware
 
36
  import matplotlib
37
  matplotlib.use('Agg')
38
 
@@ -110,8 +112,6 @@ async def root():
110
  async def root():
111
  return {"message": "Pong !!"}
112
 
113
-
114
-
115
  # BASIC KNOWLEDGE BASED ON CSV
116
 
117
  # Remove trailing slash from the URL otherwise it will redirect to GET method
@@ -324,19 +324,25 @@ def langchain_csv_chat(csv_url: str, question: str, chart_required: bool):
324
  return {"error": error_message}
325
 
326
  return {"error": "All API keys exhausted"}
 
327
 
328
  # Async endpoint with non-blocking execution
329
  @app.post("/api/csv-chat")
330
  async def csv_chat(request: Dict, authorization: str = Header(None)):
331
  # Authorization checks
332
  if not authorization or not authorization.startswith("Bearer "):
 
333
  raise HTTPException(status_code=401, detail="Invalid authorization")
334
 
335
  token = authorization.split(" ")[1]
336
  if token != os.getenv("AUTH_TOKEN"):
 
337
  raise HTTPException(status_code=403, detail="Invalid token")
338
 
 
 
339
  try:
 
340
  query = request.get("query")
341
  csv_url = request.get("csv_url")
342
  decoded_url = unquote(csv_url)
@@ -345,57 +351,112 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
345
  generate_report = request.get("generate_report")
346
  chat_id = request.get("chat_id")
347
 
 
 
 
348
  if generate_report is True:
349
- report_files = await generate_csv_report(csv_url, query, chat_id, conversation_history)
350
- if report_files is not None:
351
- return {"answer": jsonable_encoder(report_files)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
 
353
  if if_initial_chat_question(query):
354
- answer = await asyncio.to_thread(
355
- langchain_csv_chat, decoded_url, query, False
356
- )
357
- logger.info("langchain_answer:", answer)
358
- return {"answer": jsonable_encoder(answer)}
 
 
 
 
359
 
360
- # Orchestrate the execution
361
  if detailed_answer is True:
362
- orchestrator_answer = await asyncio.to_thread(
363
- csv_orchestrator_chat, decoded_url, query, conversation_history, chat_id
364
- )
365
- if orchestrator_answer is not None:
366
- return {"answer": jsonable_encoder(orchestrator_answer)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
- # Process with groq_chat first
369
- # groq_answer = await asyncio.to_thread(groq_chat, decoded_url, query)
370
- # logger.info("groq_answer:", groq_answer)
371
-
372
- result = await query_csv_agent(decoded_url, query, chat_id)
373
- logger.info("cerebras csv answer == >", result)
374
- if result is not None or result == "":
375
- return {"answer": result}
376
-
377
- # if process_answer(groq_answer) == "Empty response received.":
378
- # return {"answer": "Sorry, I couldn't find relevant data..."}
379
-
380
- # if process_answer(groq_answer):
381
- lang_answer = await asyncio.to_thread(
382
- langchain_csv_chat, decoded_url, query, False
383
- )
384
- if process_answer(lang_answer):
385
- return {"answer": "error"}
386
- return {"answer": jsonable_encoder(lang_answer)}
387
 
388
- # return {"answer": jsonable_encoder(groq_answer)}
 
 
389
 
390
  except Exception as e:
391
- logger.error(f"Error processing request: {str(e)}")
 
392
  return {"answer": "error"}
393
 
394
  def handle_out_of_range_float(value):
 
395
  if isinstance(value, float):
396
  if np.isnan(value):
 
397
  return None
398
  elif np.isinf(value):
 
399
  return "Infinity"
400
  return value
401
 
@@ -404,7 +465,6 @@ def handle_out_of_range_float(value):
404
 
405
 
406
 
407
-
408
  # CHART CODING STARTS FROM HERE
409
 
410
  instructions = """
@@ -593,7 +653,7 @@ async def csv_chart(request: dict, authorization: str = Header(None)):
593
  chat_id = request.get("chat_id", "")
594
 
595
  if generate_report is True:
596
- report_files = await generate_csv_report(csv_url, query, chat_id, conversation_history)
597
  if report_files is not None:
598
  return {"orchestrator_response": jsonable_encoder(report_files)}
599
 
 
4
  import os
5
  import asyncio
6
  import threading
7
+ import traceback
8
  import uuid
9
  from fastapi import FastAPI, HTTPException, Header
10
  from fastapi.encoders import jsonable_encoder
 
26
  import matplotlib.pyplot as plt
27
  import matplotlib
28
  import seaborn as sns
29
+ from gemini_report_generator import generate_csv_report_gemini
30
  from intitial_q_handler import if_initial_chart_question, if_initial_chat_question
31
  from orchestrator_agent import csv_orchestrator_chat
32
  from python_code_executor_service import CsvChatResult, PythonExecutor
 
34
  from cerebras_csv_agent import query_csv_agent
35
  from util_service import _prompt_generator, process_answer
36
  from fastapi.middleware.cors import CORSMiddleware
37
+
38
  import matplotlib
39
  matplotlib.use('Agg')
40
 
 
112
  async def root():
113
  return {"message": "Pong !!"}
114
 
 
 
115
  # BASIC KNOWLEDGE BASED ON CSV
116
 
117
  # Remove trailing slash from the URL otherwise it will redirect to GET method
 
324
  return {"error": error_message}
325
 
326
  return {"error": "All API keys exhausted"}
327
+ from cerebras_report_generator import generate_csv_report_cerebras
328
 
329
  # Async endpoint with non-blocking execution
330
  @app.post("/api/csv-chat")
331
  async def csv_chat(request: Dict, authorization: str = Header(None)):
332
  # Authorization checks
333
  if not authorization or not authorization.startswith("Bearer "):
334
+ logger.error("Authorization failed: Missing or invalid authorization header")
335
  raise HTTPException(status_code=401, detail="Invalid authorization")
336
 
337
  token = authorization.split(" ")[1]
338
  if token != os.getenv("AUTH_TOKEN"):
339
+ logger.error("Authorization failed: Invalid token")
340
  raise HTTPException(status_code=403, detail="Invalid token")
341
 
342
+ logger.info("Authorization successful")
343
+
344
  try:
345
+ # Extract request parameters
346
  query = request.get("query")
347
  csv_url = request.get("csv_url")
348
  decoded_url = unquote(csv_url)
 
351
  generate_report = request.get("generate_report")
352
  chat_id = request.get("chat_id")
353
 
354
+ logger.info(f"Request parameters: query='{query[:100]}...', csv_url='{csv_url}', detailed_answer={detailed_answer}, generate_report={generate_report}, chat_id={chat_id}")
355
+
356
+ # Handle report generation with Cerebras first, then Gemini fallback
357
  if generate_report is True:
358
+ logger.info("Starting report generation process...")
359
+
360
+ # Try Cerebras first for report generation
361
+ logger.info("Attempting report generation with Cerebras...")
362
+ try:
363
+ report_files = await generate_csv_report_cerebras(csv_url, query, chat_id, conversation_history)
364
+ if report_files is not None and (report_files.files.csv_files or report_files.files.image_files):
365
+ logger.info(f"Cerebras report generation successful: {len(report_files.files.csv_files)} CSV files, {len(report_files.files.image_files)} image files")
366
+ return {"answer": jsonable_encoder(report_files)}
367
+ else:
368
+ logger.warning("Cerebras report generation returned empty or None result")
369
+ except Exception as cerebras_error:
370
+ logger.error(f"Cerebras report generation failed: {str(cerebras_error)}")
371
+
372
+ # Fallback to Gemini for report generation
373
+ logger.info("Falling back to Gemini for report generation...")
374
+ try:
375
+ report_files = await generate_csv_report_gemini(csv_url, query, chat_id, conversation_history)
376
+ if report_files is not None and (report_files.files.csv_files or report_files.files.image_files):
377
+ logger.info(f"Gemini report generation successful: {len(report_files.files.csv_files)} CSV files, {len(report_files.files.image_files)} image files")
378
+ return {"answer": jsonable_encoder(report_files)}
379
+ else:
380
+ logger.warning("Gemini report generation returned empty or None result")
381
+ except Exception as gemini_error:
382
+ logger.error(f"Gemini report generation failed: {str(gemini_error)}")
383
+
384
+ logger.error("Both Cerebras and Gemini report generation failed")
385
+ return {"answer": "error"}
386
 
387
+ # Handle initial chat questions with langchain
388
  if if_initial_chat_question(query):
389
+ logger.info("Processing as initial chat question with langchain...")
390
+ try:
391
+ answer = await asyncio.to_thread(
392
+ langchain_csv_chat, decoded_url, query, False
393
+ )
394
+ logger.info(f"Langchain initial chat answer: {str(answer)[:200]}...")
395
+ return {"answer": jsonable_encoder(answer)}
396
+ except Exception as e:
397
+ logger.error(f"Langchain initial chat failed: {str(e)}")
398
 
399
+ # Handle detailed answers with orchestrator
400
  if detailed_answer is True:
401
+ logger.info("Processing detailed answer with orchestrator...")
402
+ try:
403
+ orchestrator_answer = await asyncio.to_thread(
404
+ csv_orchestrator_chat, decoded_url, query, conversation_history, chat_id
405
+ )
406
+ if orchestrator_answer is not None:
407
+ logger.info(f"Orchestrator answer successful: {str(orchestrator_answer)[:200]}...")
408
+ return {"answer": jsonable_encoder(orchestrator_answer)}
409
+ else:
410
+ logger.warning("Orchestrator returned None result")
411
+ except Exception as e:
412
+ logger.error(f"Orchestrator processing failed: {str(e)}")
413
+
414
+ # Process with standard CSV agent (not Cerebras)
415
+ logger.info("Processing with standard CSV agent...")
416
+ try:
417
+ result = await query_csv_agent(decoded_url, query, chat_id)
418
+ logger.info(f"Standard CSV agent result: {str(result)[:200]}...")
419
+ if result is not None and result != "":
420
+ return {"answer": result}
421
+ else:
422
+ logger.warning("Standard CSV agent returned empty or None result")
423
+ except Exception as e:
424
+ logger.error(f"Standard CSV agent failed: {str(e)}")
425
 
426
+ # Fallback to langchain
427
+ logger.info("Falling back to langchain CSV chat...")
428
+ try:
429
+ lang_answer = await asyncio.to_thread(
430
+ langchain_csv_chat, decoded_url, query, False
431
+ )
432
+ logger.info(f"Langchain fallback result: {str(lang_answer)[:200]}...")
433
+
434
+ if process_answer(lang_answer):
435
+ logger.error("Langchain fallback produced error response")
436
+ return {"answer": "error"}
437
+
438
+ logger.info("Langchain fallback successful")
439
+ return {"answer": jsonable_encoder(lang_answer)}
440
+ except Exception as e:
441
+ logger.error(f"Langchain fallback failed: {str(e)}")
 
 
 
442
 
443
+ # If all methods fail
444
+ logger.error("All processing methods failed")
445
+ return {"answer": "error"}
446
 
447
  except Exception as e:
448
+ logger.error(f"Critical error processing request: {str(e)}")
449
+ logger.error(f"Error traceback: {traceback.format_exc()}")
450
  return {"answer": "error"}
451
 
452
  def handle_out_of_range_float(value):
453
+ """Handle out of range float values for JSON serialization"""
454
  if isinstance(value, float):
455
  if np.isnan(value):
456
+ logger.debug("Converting NaN to None")
457
  return None
458
  elif np.isinf(value):
459
+ logger.debug("Converting Infinity to string")
460
  return "Infinity"
461
  return value
462
 
 
465
 
466
 
467
 
 
468
  # CHART CODING STARTS FROM HERE
469
 
470
  instructions = """
 
653
  chat_id = request.get("chat_id", "")
654
 
655
  if generate_report is True:
656
+ report_files = await generate_csv_report_gemini(csv_url, query, chat_id, conversation_history)
657
  if report_files is not None:
658
  return {"orchestrator_response": jsonable_encoder(report_files)}
659
 
gemini_report_generator.py CHANGED
@@ -294,7 +294,7 @@ def gemini_llm_chat(csv_url: str, query: str, conversation_history: List[Dict[st
294
  }
295
 
296
 
297
- async def generate_csv_report(csv_url: str, query: str, chat_id: str, conversation_history: List[Dict[str, Any]]) -> FileBoxProps:
298
  try:
299
  result = gemini_llm_chat(csv_url, query, conversation_history)
300
  logger.info(f"Raw result from gemini_llm_chat: {result}")
 
294
  }
295
 
296
 
297
+ async def generate_csv_report_gemini(csv_url: str, query: str, chat_id: str, conversation_history: List[Dict[str, Any]]) -> FileBoxProps:
298
  try:
299
  result = gemini_llm_chat(csv_url, query, conversation_history)
300
  logger.info(f"Raw result from gemini_llm_chat: {result}")