Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Oct 25, 2025

Commit

3dadacf

1 Parent(s): 7f5f121

whatever

Browse files

Files changed (2) hide show

controller.py +1 -28
groq_csv_agent.py +0 -187

controller.py CHANGED Viewed

@@ -35,7 +35,6 @@ from orchestrator_agent import csv_orchestrator_chat_gemini
 from python_code_executor_service import CsvChatResult, PythonExecutor
 from supabase_service import upload_file_to_supabase
 from cerebras_csv_agent import query_csv_agent_cerebras
-from groq_csv_agent import query_csv_agent_groq
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
@@ -456,18 +455,6 @@ async def csv_chat(request: Dict, authorization: str = Header(None)):
             logger.info("Processing detailed answer with orchestrator...")
             return await handle_detailed_answer(decoded_url, query, conversation_history, chat_id)
-        # Process with standard CSV agent (Groq)
-        logger.info("Processing with standard CSV agent (Groq)...")
-        try:
-            result = await query_csv_agent_groq(decoded_url, query, chat_id)
-            logger.info(f"Standard CSV agent (Groq) result: {str(result)[:200]}...")
-            if result is not None and result != "":
-                return {"answer": result}
-            else:
-                logger.warning("Standard CSV agent (Groq) returned empty or None result")
-        except Exception as e:
-            logger.error(f"Standard CSV agent (Groq) failed: {str(e)}")
         # Process with standard CSV agent (Cerebras)
         logger.info("Groq failed. Processing with standard CSV agent (Cerebras)...")
         try:
@@ -738,21 +725,7 @@ async def csv_chart(request: dict, authorization: str = Header(None)):
            if orchestrator_answer is not None:
              return {"orchestrator_response": jsonable_encoder(orchestrator_answer)}
-        # Next, try the groq-based method
-        # groq_result = await loop.run_in_executor(
-        #     process_executor, groq_chart, csv_url, query
-        # )
-        # logger.info(f"Groq chart result: {groq_result}")
-        # if isinstance(groq_result, str) and groq_result != "Chart not generated":
-        #     unique_file_name =f'{str(uuid.uuid4())}.png'
-        #     logger.info("Uploading the chart to supabase...")
-        #     image_public_url = await upload_file_to_supabase(f"{groq_result}", unique_file_name, chat_id=chat_id)
-        #     logger.info("Image uploaded to Supabase and Image URL is... ", {image_public_url})
-        #     os.remove(groq_result)
-        #     return {"image_url": image_public_url}
-            # return FileResponse(groq_result, media_type="image/png")
         logger.info("Trying cerebras ai llama...")
         result = await query_csv_agent_cerebras(csv_url, query, chat_id)
         logger.info("cerebras ai result ==>", result)

 from python_code_executor_service import CsvChatResult, PythonExecutor
 from supabase_service import upload_file_to_supabase
 from cerebras_csv_agent import query_csv_agent_cerebras
 from util_service import _prompt_generator, process_answer
 from fastapi.middleware.cors import CORSMiddleware
             logger.info("Processing detailed answer with orchestrator...")
             return await handle_detailed_answer(decoded_url, query, conversation_history, chat_id)
         # Process with standard CSV agent (Cerebras)
         logger.info("Groq failed. Processing with standard CSV agent (Cerebras)...")
         try:
            if orchestrator_answer is not None:
              return {"orchestrator_response": jsonable_encoder(orchestrator_answer)}
         logger.info("Trying cerebras ai llama...")
         result = await query_csv_agent_cerebras(csv_url, query, chat_id)
         logger.info("cerebras ai result ==>", result)

groq_csv_agent.py DELETED Viewed

@@ -1,187 +0,0 @@
-import pandas as pd
-import json
-from typing import Optional
-from pydantic import BaseModel
-from dotenv import load_dotenv
-from pydantic_ai import Agent
-from csv_service import clean_data
-from python_code_executor_service import PythonExecutor
-from groq_instance_provider import InstanceProvider
-import logging
-load_dotenv()
-instance_provider = InstanceProvider()
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class CodeResponse(BaseModel):
-    """Container for code-related responses"""
-    language: str = "python"
-    code: str
-class ChartSpecification(BaseModel):
-    """Details about requested charts"""
-    image_description: str
-    code: Optional[str] = None
-class AnalysisOperation(BaseModel):
-    """Container for a single analysis operation with its code and result"""
-    code: CodeResponse
-    result_var: str
-class CsvChatResult(BaseModel):
-    """Structured response for CSV-related AI interactions"""
-    # Casual chat response
-    casual_response: str
-    # Data analysis components
-    # analysis_operations: List[AnalysisOperation]
-    analysis_operations: Optional[AnalysisOperation]
-    # Visualization components
-    # charts: Optional[List[ChartSpecification]] = None
-    charts: Optional[ChartSpecification]
-def get_csv_info(df: pd.DataFrame) -> dict:
-    """Get metadata/info about the CSV"""
-    info = {
-        'num_rows': len(df),
-        'num_cols': len(df.columns),
-        'example_rows': df.head(2).to_dict('records'),
-        'dtypes': {col: str(df[col].dtype) for col in df.columns},
-        'columns': list(df.columns),
-        'numeric_columns': [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])],
-        'categorical_columns': [col for col in df.columns if pd.api.types.is_string_dtype(df[col])]
-    }
-    return info
-def get_csv_system_prompt(df: pd.DataFrame) -> str:
-    """Generate system prompt for CSV analysis with strict variable requirements"""
-    csv_info = get_csv_info(df)
-    prompt = f"""
-You're a CSV analysis assistant. The pandas DataFrame is loaded as 'df' - use this variable.
-CSV Info:
-- Shape: {csv_info['num_rows']} rows × {csv_info['num_cols']} cols
-- Columns: {csv_info['columns']}
-- Sample: {csv_info['example_rows']}
-- Dtypes: {csv_info['dtypes']}
-STRICT REQUIREMENTS:
-1. NEVER calculate or predict values yourself - ALWAYS return executable code that would produce the result
-2. Use existing 'df' - never recreate it
-3. ALWAYS assign results to variables with descriptive names
-4. For any data structures (Lists, Records, Tables, Dictionaries, etc.), always return them as JSON with correct indentation
-5. For charts:
-   - Use matplotlib/seaborn only
-   - Professional quality: proper sizing, labels, titles
-   - Figure size: (14, 8) for complex, (12, 6) for simple
-   - Clear titles (fontsize=16), labels (fontsize=14)
-   - Rotate x-labels if needed (45°, fontsize=12)
-   - Add annotations/gridlines where helpful
-   - Use colorblind-friendly palettes
-   - Always include plt.tight_layout()
-VARIABLE ASSIGNMENT RULES:
-1. Every operation must store its result in a variable
-2. Variable names should be descriptive and snake_case
-3. For DataFrame operations: result_df = df.operation()
-4. For statistical results: stats_result = df['column'].describe()
-5. For visualizations: assign the figure/axis objects when needed
-6. For filtered data: filtered_data = df[df['column'] > value]
-Example professional chart (with variable assignment):
-fig, ax = plt.subplots(figsize=(14, 8))
-sns.barplot(x='category', y='value', data=df, palette='muted', ax=ax)
-ax.set_title('Value by Category', fontsize=16)
-ax.set_xlabel('Category', fontsize=14)
-ax.set_ylabel('Value', fontsize=14)
-ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
-ax.grid(alpha=0.3)
-plt.tight_layout()
-plt.show()
-Example professional response for dataframe operations:
-# GOOD (with variable assignment)
-sample_transactions = df.sample(5)[['id', 'date', 'amount']]
-transaction_stats = df['amount'].describe()
-# BAD (no variable assignment)
-df.sample(5)[['id', 'date', 'amount']]  # No variable assigned!
-Example professional response for analysis:
-# Calculate average by category
-category_means = df.groupby('category')['value'].mean().reset_index()
-# Get top 10 items
-top_items = df.nlargest(10, 'sales')
-Return complete, executable code that follows these rules.
-Your code MUST assign all results to variables.
-"""
-    return prompt
-def create_csv_agent(df: pd.DataFrame, max_retries: int = 1) -> Agent:
-    """Create and return a CSV analysis agent with API key rotation"""
-    csv_system_prompt = get_csv_system_prompt(df)
-    for attempt in range(max_retries):
-        try:
-            model = instance_provider.get_instance()
-            if model is None:
-                raise RuntimeError("No available API instances")
-            csv_agent = Agent(
-                model=model,
-                output_type=CsvChatResult,
-                system_prompt=csv_system_prompt,
-            )
-            return csv_agent
-        except Exception as e:
-            api_key = instance_provider.get_api_key_for_model(model)
-            if api_key:
-                logger.info(f"Error with API key (attempt {attempt + 1}): {str(e)}")
-                instance_provider.report_error(api_key)
-            continue
-    raise RuntimeError(f"Failed to create agent after {max_retries} attempts")
-async def query_csv_agent_groq(csv_url: str, question: str, chat_id: str) -> str:
-    """Query the CSV agent with a DataFrame and question and return formatted output"""
-    # Get the DataFrame from the CSV URL
-    df = clean_data(csv_url)
-    # Create agent and get response
-    agent = create_csv_agent(df)
-    result = await agent.run(question)
-    # Process the response through PythonExecutor
-    executor = PythonExecutor(df)
-    # Convert the raw output to CsvChatResult if needed
-    if not isinstance(result.output, CsvChatResult):
-        # Handle case where output needs conversion
-        try:
-            response_data = result.output if isinstance(result.output, dict) else json.loads(result.output)
-            chat_result = CsvChatResult(**response_data)
-        except Exception as e:
-            raise ValueError(f"Could not parse agent response: {str(e)}")
-    else:
-        chat_result = result.output
-        logger.info("Chat Result Original Object:", chat_result)
-    # Process and format the response
-    formatted_output = await executor.process_response(chat_result, chat_id)
-    return formatted_output