Spaces:

Soumik555
/

FastApi

Running

App Files Files Community

Soumik555 commited on Apr 2, 2025

Commit

30e7daa

1 Parent(s): ee5c938

openai key rotate

Browse files

Files changed (8) hide show

gemini_langchain_agent.py +260 -55
gemini_report_generator.py +410 -0
groq_chart.py +101 -0
groq_chat.py +89 -0
lc_groq_chart.py +82 -0
lc_groq_chat.py +75 -0
orchestrator_agent.py +146 -45
orchestrator_functions.py +0 -1

gemini_langchain_agent.py CHANGED Viewed

@@ -18,12 +18,17 @@ matplotlib.use('Agg')
 load_dotenv()
 model_name = 'gemini-2.0-flash'  # Specify the model name
-google_api_keys = list(reversed(os.getenv("GEMINI_API_KEYS").split(",")))
-current_key_index = 0  # Global index for API keys
 def create_agent(llm, data, tools):
     """Create agent with tool names"""
     return create_pandas_dataframe_agent(
         llm,
         data,
@@ -34,69 +39,62 @@ def create_agent(llm, data, tools):
         return_intermediate_steps=True
     )
 def _prompt_generator(question: str, chart_required: bool):
     chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
-                         1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
-                         2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
-                         3. **Communication:** Provide concise, professional, and well-structured responses.
-                         4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
-                         **Query:** {question}
-                    """
     chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
-                       1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
-                       2. Visualization requirements:
-                       - Adjust font sizes, rotate labels (45° if needed), truncate for readability
-                       - Figure size: (12, 6)
-                       - Descriptive titles (fontsize=14)
-                       - Colorblind-friendly palettes
-                       3. File handling rules:
-                       - Create MAXIMUM 2 charts if absolutely necessary
-                       - For multiple charts:
-                           * Arrange in grid format (2x1 vertical layout preferred)
-                           * Use SAME unique_id with suffixes:
-                           - f"{{unique_id}}_1.png"
-                           - f"{{unique_id}}_2.png"
-                       - Save EXCLUSIVELY to "generated_charts" folder
-                       - File naming: f"chart_{{unique_id}}.png" (for single chart)
-                       4. FINAL OUTPUT MUST BE:
-                       - For single chart: f"generated_charts/chart_{{unique_id}}.png"
-                       - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
-                       - **ONLY return this full path string, nothing else**
-                       **Query:** {question}
-                       IMPORTANT:
-                       - Generate the unique_id FIRST before any operations
-                       - Use THE SAME unique_id throughout entire process
-                       - NEVER generate new UUIDs after initial creation
-                       - Return EXACT filepath string of the final saved chart
-                       """
     if chart_required:
         return ChatPromptTemplate.from_template(chart_prompt)
     else:
         return ChatPromptTemplate.from_template(chat_prompt)
 def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
-    global current_key_index
     data = pd.read_csv(csv_url)
-    attempts = 0
-    total_keys = len(google_api_keys)
-    while attempts < total_keys:
         try:
-            api_key = google_api_keys[current_key_index]
-            print(f"Using API key index {current_key_index}")
-            llm = ChatGoogleGenerativeAI(model=model_name, api_key=api_key)
             # Create tool with validated name
             tool = PythonAstREPLTool(
@@ -113,15 +111,222 @@ def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bo
             )
             agent = create_agent(llm, data, [tool])
             prompt = _prompt_generator(question, chart_required)
             result = agent.invoke({"input": prompt})
-            return result.get("output")
         except Exception as e:
-            print(f"Error using API key index {current_key_index}: {e}")
-            current_key_index = (current_key_index + 1) % total_keys
-            attempts += 1
-    print("All API keys have been exhausted.")
     return None

 load_dotenv()
 model_name = 'gemini-2.0-flash'  # Specify the model name
+google_api_keys = os.getenv("GEMINI_API_KEYS").split(",")
+# Create pre-initialized LLM instances
+llm_instances = [
+    ChatGoogleGenerativeAI(model=model_name, api_key=key)
+    for key in google_api_keys
+]
+current_instance_index = 0  # Track current instance being used
 def create_agent(llm, data, tools):
     """Create agent with tool names"""
     return create_pandas_dataframe_agent(
         llm,
         data,
         return_intermediate_steps=True
     )
 def _prompt_generator(question: str, chart_required: bool):
     chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
+                     1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
+                     2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
+                     3. **Communication:** Provide concise, professional, and well-structured responses.
+                     4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
+                     **Query:** {question}
+                 """
     chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
+                   1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
+                   2. Visualization requirements:
+                   - Adjust font sizes, rotate labels (45° if needed), truncate for readability
+                   - Figure size: (12, 6)
+                   - Descriptive titles (fontsize=14)
+                   - Colorblind-friendly palettes
+                   3. File handling rules:
+                   - Create MAXIMUM 2 charts if absolutely necessary
+                   - For multiple charts:
+                       * Arrange in grid format (2x1 vertical layout preferred)
+                       * Use SAME unique_id with suffixes:
+                       - f"{{unique_id}}_1.png"
+                       - f"{{unique_id}}_2.png"
+                   - Save EXCLUSIVELY to "generated_charts" folder
+                   - File naming: f"chart_{{unique_id}}.png" (for single chart)
+                   4. FINAL OUTPUT MUST BE:
+                   - For single chart: f"generated_charts/chart_{{unique_id}}.png"
+                   - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
+                   - **ONLY return this full path string, nothing else**
+                   **Query:** {question}
+                   IMPORTANT:
+                   - Generate the unique_id FIRST before any operations
+                   - Use THE SAME unique_id throughout entire process
+                   - NEVER generate new UUIDs after initial creation
+                   - Return EXACT filepath string of the final saved chart
+                   """
     if chart_required:
         return ChatPromptTemplate.from_template(chart_prompt)
     else:
         return ChatPromptTemplate.from_template(chat_prompt)
 def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
+    global current_instance_index
     data = pd.read_csv(csv_url)
+    # Try all available instances
+    while current_instance_index < len(llm_instances):
         try:
+            llm = llm_instances[current_instance_index]
+            print(f"Using LLM instance index {current_instance_index}")
             # Create tool with validated name
             tool = PythonAstREPLTool(
             )
             agent = create_agent(llm, data, [tool])
             prompt = _prompt_generator(question, chart_required)
             result = agent.invoke({"input": prompt})
+            output = result.get("output")
+            if output is None:
+                raise ValueError("Received None response from agent")
+            return output
         except Exception as e:
+            print(f"Error using LLM instance index {current_instance_index}: {e}")
+            current_instance_index += 1
+    print("All LLM instances have been exhausted.")
     return None
+# import os
+# import re
+# import uuid
+# from langchain_google_genai import ChatGoogleGenerativeAI
+# import pandas as pd
+# from langchain_core.prompts import ChatPromptTemplate
+# from langchain_experimental.tools import PythonAstREPLTool
+# from langchain_experimental.agents import create_pandas_dataframe_agent
+# from dotenv import load_dotenv
+# import numpy as np
+# import matplotlib.pyplot as plt
+# import matplotlib
+# import seaborn as sns
+# import datetime as dt
+# # Set the backend for matplotlib to 'Agg' to avoid GUI issues
+# matplotlib.use('Agg')
+# load_dotenv()
+# model_name = 'gemini-2.0-flash'  # Specify the model name
+# google_api_keys = os.getenv("GEMINI_API_KEYS").split(",")
+# # Create pre-initialized LLM instances
+# llm_instances = [
+#     ChatGoogleGenerativeAI(model=model_name, api_key=key)
+#     for key in google_api_keys
+# ]
+# current_instance_index = 0  # Track current instance being used
+# def is_retryable_error(error: Exception) -> bool:
+#     """Check if the error should trigger a retry with next instance"""
+#     error_str = str(error).lower()
+#     retry_conditions = [
+#         # Rate limiting and quota errors
+#         '429' in error_str,
+#         'quota' in error_str,
+#         'rate limit' in error_str,
+#         'resource exhausted' in error_str,
+#         'exceeded' in error_str,
+#         'limit reached' in error_str,
+#         # Authentication and permission errors
+#         'permission denied' in error_str,
+#         'invalid api key' in error_str,
+#         'authentication' in error_str,
+#         # Server errors
+#         '500' in error_str,
+#         '503' in error_str,
+#         'service unavailable' in error_str,
+#         # Connection issues
+#         'timeout' in error_str,
+#         'connection' in error_str,
+#         # Content policy
+#         'content policy' in error_str,
+#         'safety' in error_str,
+#         'blocked' in error_str
+#     ]
+#     return any(retry_conditions)
+# def create_agent(llm, data, tools):
+#     """Create agent with tool names"""
+#     return create_pandas_dataframe_agent(
+#         llm,
+#         data,
+#         agent_type="tool-calling",
+#         verbose=True,
+#         allow_dangerous_code=True,
+#         extra_tools=tools,
+#         return_intermediate_steps=True
+#     )
+# def _prompt_generator(question: str, chart_required: bool):
+#     chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:
+#                      1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.
+#                      2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.
+#                      3. **Communication:** Provide concise, professional, and well-structured responses.
+#                      4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)
+#                      **Query:** {question}
+#                  """
+#     chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:
+#                    1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
+#                    2. Visualization requirements:
+#                    - Adjust font sizes, rotate labels (45° if needed), truncate for readability
+#                    - Figure size: (12, 6)
+#                    - Descriptive titles (fontsize=14)
+#                    - Colorblind-friendly palettes
+#                    3. File handling rules:
+#                    - Create MAXIMUM 2 charts if absolutely necessary
+#                    - For multiple charts:
+#                        * Arrange in grid format (2x1 vertical layout preferred)
+#                        * Use SAME unique_id with suffixes:
+#                        - f"{{unique_id}}_1.png"
+#                        - f"{{unique_id}}_2.png"
+#                    - Save EXCLUSIVELY to "generated_charts" folder
+#                    - File naming: f"chart_{{unique_id}}.png" (for single chart)
+#                    4. FINAL OUTPUT MUST BE:
+#                    - For single chart: f"generated_charts/chart_{{unique_id}}.png"
+#                    - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
+#                    - **ONLY return this full path string, nothing else**
+#                    **Query:** {question}
+#                    IMPORTANT:
+#                    - Generate the unique_id FIRST before any operations
+#                    - Use THE SAME unique_id throughout entire process
+#                    - NEVER generate new UUIDs after initial creation
+#                    - Return EXACT filepath string of the final saved chart
+#                    """
+#     if chart_required:
+#         return ChatPromptTemplate.from_template(chart_prompt)
+#     else:
+#         return ChatPromptTemplate.from_template(chat_prompt)
+# def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
+#     global current_instance_index
+#     data = pd.read_csv(csv_url)
+#     # Track first error in case all instances fail
+#     first_error = None
+#     while current_instance_index < len(llm_instances):
+#         try:
+#             llm = llm_instances[current_instance_index]
+#             print(f"Attempting with LLM instance {current_instance_index + 1}/{len(llm_instances)}")
+#             # Create tool with validated name
+#             tool = PythonAstREPLTool(
+#                 locals={
+#                     "df": data,
+#                     "pd": pd,
+#                     "np": np,
+#                     "plt": plt,
+#                     "sns": sns,
+#                     "matplotlib": matplotlib,
+#                     "uuid": uuid,
+#                     "dt": dt
+#                 },
+#             )
+#             agent = create_agent(llm, data, [tool])
+#             prompt = _prompt_generator(question, chart_required)
+#             result = agent.invoke({"input": prompt})
+#             output = result.get("output")
+#             if output is None:
+#                 raise ValueError("Received None response from agent")
+#             if isinstance(output, str) and any(err in output.lower() for err in ['quota', 'limit', 'exhausted']):
+#                 raise ValueError(f"API limitation detected in response: {output}")
+#             return output
+#         except Exception as e:
+#             error_msg = f"Error with instance {current_instance_index}: {str(e)}"
+#             print(error_msg)
+#             # Store first error if not set
+#             if first_error is None:
+#                 first_error = error_msg
+#             # Check if we should try next instance
+#             if is_retryable_error(e):
+#                 current_instance_index += 1
+#                 continue
+#             else:
+#                 # Non-retryable error - return immediately
+#                 return {
+#                     "error": "Non-retryable error occurred",
+#                     "details": str(e),
+#                     "instance": current_instance_index
+#                 }
+#     # All instances exhausted
+#     error_response = {
+#         "error": "All API instances failed",
+#         "details": first_error or "Unknown error",
+#         "attempted_instances": current_instance_index
+#     }
+#     print(error_response)
+#     return error_response

gemini_report_generator.py CHANGED Viewed

@@ -364,3 +364,413 @@ async def generate_csv_report(csv_url: str, query: str) -> FileBoxProps:
 #     result = gemini_llm_chat("./documents/enterprise_sales_data.csv",
 #                            "Generate a detailed sales report of the last 6 months from all the aspects and include a bar chart showing the sales by region.")
 #     print(json.dumps(result, indent=2))

 #     result = gemini_llm_chat("./documents/enterprise_sales_data.csv",
 #                            "Generate a detailed sales report of the last 6 months from all the aspects and include a bar chart showing the sales by region.")
 #     print(json.dumps(result, indent=2))
+# import json
+# import numpy as np
+# import pandas as pd
+# import re
+# import os
+# import uuid
+# import logging
+# from io import StringIO
+# import sys
+# import traceback
+# from typing import Optional, Dict, Any, List, Tuple
+# from pydantic import BaseModel, Field
+# from google.api_core import exceptions as google_exceptions
+# from google.generativeai import GenerativeModel, configure
+# from dotenv import load_dotenv
+# import seaborn as sns
+# import datetime as dt
+# from supabase_service import upload_file_to_supabase
+# pd.set_option('display.max_columns', None)
+# pd.set_option('display.max_rows', None)
+# pd.set_option('display.max_colwidth', None)
+# load_dotenv()
+# API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
+# MODEL_NAME = 'gemini-2.0-flash'
+# class FileProps(BaseModel):
+#     fileName: str
+#     filePath: str
+#     fileType: str  # 'csv' | 'image'
+# class Files(BaseModel):
+#     csv_files: List[FileProps]
+#     image_files: List[FileProps]
+# class FileBoxProps(BaseModel):
+#     files: Files
+# os.environ['MPLBACKEND'] = 'agg'
+# import matplotlib.pyplot as plt
+# plt.show = lambda: None
+# logging.basicConfig(
+#     level=logging.INFO,
+#     format='%(asctime)s - %(levelname)s - %(message)s'
+# )
+# logger = logging.getLogger(__name__)
+# class GeminiInstance:
+#     """Wrapper for a single Gemini API instance"""
+#     def __init__(self, api_key: str):
+#         self.api_key = api_key
+#         self.model = None
+#         self.active = False
+#         self.failure_count = 0
+#         self.last_error = None
+#     def initialize(self) -> bool:
+#         try:
+#             configure(api_key=self.api_key)
+#             self.model = GenerativeModel(MODEL_NAME)
+#             self.active = True
+#             logger.info(f"Initialized Gemini instance with key: {self._mask_key()}")
+#             return True
+#         except Exception as e:
+#             self.last_error = str(e)
+#             self.failure_count += 1
+#             logger.error(f"Failed to initialize Gemini instance: {self._mask_key()}. Error: {str(e)}")
+#             return False
+#     def _mask_key(self) -> str:
+#         return f"{self.api_key[:8]}...{self.api_key[-4:]}" if self.api_key else "None"
+#     def generate_content(self, prompt: str) -> Tuple[Optional[str], Optional[Exception]]:
+#         try:
+#             response = self.model.generate_content(prompt)
+#             return response.text, None
+#         except Exception as e:
+#             self.last_error = str(e)
+#             self.failure_count += 1
+#             return None, e
+# class GeminiPool:
+#     """Pool of Gemini API instances with automatic failover"""
+#     def __init__(self, api_keys: List[str]):
+#         self.instances = [GeminiInstance(key) for key in api_keys]
+#         self.current_index = 0
+#         self.total_attempts = 0
+#     def get_active_instance(self) -> Optional[GeminiInstance]:
+#         """Get next available instance with automatic rotation"""
+#         if not self.instances:
+#             return None
+#         for _ in range(len(self.instances)):
+#             instance = self.instances[self.current_index]
+#             self.current_index = (self.current_index + 1) % len(self.instances)
+#             self.total_attempts += 1
+#             if instance.active or instance.initialize():
+#                 return instance
+#         return None
+#     def should_retry(self, error: Exception) -> bool:
+#         """Determine if the error is retryable"""
+#         if isinstance(error, google_exceptions.ResourceExhausted):
+#             return True
+#         if isinstance(error, google_exceptions.TooManyRequests):
+#             return True
+#         if isinstance(error, google_exceptions.ServiceUnavailable):
+#             return True
+#         error_str = str(error).lower()
+#         retry_phrases = [
+#             'quota',
+#             'limit',
+#             'exhausted',
+#             'retry',
+#             'unavailable',
+#             'overloaded',
+#             '429',
+#             '503'
+#         ]
+#         return any(phrase in error_str for phrase in retry_phrases)
+# class PythonREPL:
+#     """Secure Python REPL with file generation tracking"""
+#     def __init__(self, df: pd.DataFrame):
+#         self.df = df
+#         self.output_dir = os.path.abspath(f'generated_outputs/{uuid.uuid4()}')
+#         os.makedirs(self.output_dir, exist_ok=True)
+#         self.local_env = {
+#             "pd": pd,
+#             "df": self.df.copy(),
+#             "plt": plt,
+#             "os": os,
+#             "uuid": uuid,
+#             "sns": sns,
+#             "json": json,
+#             "dt": dt,
+#             "output_dir": self.output_dir
+#         }
+#     def execute(self, code: str) -> Dict[str, Any]:
+#         old_stdout = sys.stdout
+#         sys.stdout = mystdout = StringIO()
+#         file_tracker = {
+#             'csv_files': set(),
+#             'image_files': set()
+#         }
+#         try:
+#             code = f"""
+# import matplotlib.pyplot as plt
+# plt.switch_backend('agg')
+# {code}
+# plt.close('all')
+#             """
+#             exec(code, self.local_env)
+#             self.df = self.local_env.get('df', self.df)
+#             # Track generated files
+#             for fname in os.listdir(self.output_dir):
+#                 if fname.endswith('.csv'):
+#                     file_tracker['csv_files'].add(fname)
+#                 elif fname.lower().endswith(('.png', '.jpg', '.jpeg')):
+#                     file_tracker['image_files'].add(fname)
+#             error = False
+#             error_msg = None
+#         except Exception as e:
+#             error_msg = traceback.format_exc()
+#             error = True
+#         finally:
+#             sys.stdout = old_stdout
+#         return {
+#             "output": mystdout.getvalue(),
+#             "error": error,
+#             "error_message": error_msg if error else None,
+#             "df": self.local_env.get('df', self.df),
+#             "output_dir": self.output_dir,
+#             "files": {
+#                 "csv": [os.path.join(self.output_dir, f) for f in file_tracker['csv_files']],
+#                 "images": [os.path.join(self.output_dir, f) for f in file_tracker['image_files']]
+#             }
+#         }
+# class RethinkAgent(BaseModel):
+#     df: pd.DataFrame
+#     max_retries: int = Field(default=5, ge=1)
+#     current_retry: int = Field(default=0, ge=0)
+#     repl: Optional[PythonREPL] = None
+#     gemini_pool: Optional[GeminiPool] = None
+#     class Config:
+#         arbitrary_types_allowed = True
+#     def _extract_code(self, response: str) -> str:
+#         code_match = re.search(r'```python(.*?)```', response, re.DOTALL)
+#         return code_match.group(1).strip() if code_match else response.strip()
+#     def _generate_initial_prompt(self, query: str) -> str:
+#         return f"""Generate DIRECT EXECUTION CODE (no functions, no explanations) following STRICT RULES:
+#         MANDATORY REQUIREMENTS:
+#         1. Operate directly on existing 'df' variable
+#         2. Save ALL final DataFrames to CSV using: df.to_csv(f'{{output_dir}}/descriptive_name.csv')
+#         3. For visualizations: plt.savefig(f'{{output_dir}}/chart_name.png')
+#         4. Use EXACTLY this structure:
+#            # Data processing
+#            df_processed = df[...]  # filtering/grouping
+#            # Save results
+#            df_processed.to_csv(f'{{output_dir}}/result.csv')
+#            # Visualizations (if needed)
+#            plt.figure()
+#            ... plotting code ...
+#            plt.savefig(f'{{output_dir}}/chart.png')
+#            plt.close()
+#         FORBIDDEN:
+#         - Function definitions
+#         - Dummy data creation
+#         - Any code blocks besides pandas operations and matplotlib
+#         - Print statements showing dataframes
+#         DATAFRAME COLUMNS: {', '.join(self.df.columns)}
+#         DATAFRAME'S FIRST FIVE ROWS: {self.df.head().to_dict('records')}
+#         USER QUERY: {query}
+#         EXAMPLE RESPONSE FOR "Sales by region":
+#         # Data processing
+#         sales_by_region = df.groupby('region')['sales'].sum().reset_index()
+#         # Save results
+#         sales_by_region.to_csv(f'{{output_dir}}/sales_by_region.csv')
+#         """
+#     def _generate_retry_prompt(self, query: str, error: str, code: str) -> str:
+#         return f"""FIX THIS CODE (failed with: {error}) by STRICTLY FOLLOWING:
+#         1. REMOVE ALL FUNCTION DEFINITIONS
+#         2. ENSURE DIRECT DF OPERATIONS
+#         3. USE EXPLICIT output_dir PATHS
+#         4. ADD NECESSARY IMPORTS IF MISSING
+#         5. VALIDATE COLUMN NAMES EXIST
+#         BAD CODE:
+#         {code}
+#         CORRECTED CODE:"""
+#     def initialize_pool(self) -> bool:
+#         self.gemini_pool = GeminiPool(API_KEYS)
+#         return True
+#     def generate_code(self, query: str, error: Optional[str] = None, previous_code: Optional[str] = None) -> str:
+#         prompt = self._generate_retry_prompt(query, error, previous_code) if error else self._generate_initial_prompt(query)
+#         instance = self.gemini_pool.get_active_instance()
+#         if not instance:
+#             raise RuntimeError("No available Gemini instances")
+#         response_text, error = instance.generate_content(prompt)
+#         if error:
+#             if self.gemini_pool.should_retry(error):
+#                 logger.warning(f"Retryable error from Gemini: {str(error)}")
+#                 return self.generate_code(query, error, previous_code)
+#             raise error
+#         return self._extract_code(response_text)
+#     def execute_query(self, query: str) -> Dict[str, Any]:
+#         self.repl = PythonREPL(self.df)
+#         result = None
+#         while self.current_retry < self.max_retries:
+#             try:
+#                 code = self.generate_code(query,
+#                                          result["error_message"] if result else None,
+#                                          result["code"] if result else None)
+#                 execution_result = self.repl.execute(code)
+#                 if execution_result["error"]:
+#                     self.current_retry += 1
+#                     result = {
+#                         "error_message": execution_result["error_message"],
+#                         "code": code
+#                     }
+#                 else:
+#                     return {
+#                         "text": execution_result["output"],
+#                         "csv_files": execution_result["files"]["csv"],
+#                         "image_files": execution_result["files"]["images"]
+#                     }
+#             except Exception as e:
+#                 return {
+#                     "error": f"Critical failure: {str(e)}",
+#                     "csv_files": [],
+#                     "image_files": []
+#                 }
+#         return {
+#             "error": f"Failed after {self.max_retries} retries",
+#             "csv_files": [],
+#             "image_files": []
+#         }
+# def gemini_llm_chat(csv_url: str, query: str) -> Dict[str, Any]:
+#     try:
+#         df = pd.read_csv(csv_url)
+#         agent = RethinkAgent(df=df)
+#         if not agent.initialize_pool():
+#             return {"error": "API pool initialization failed"}
+#         result = agent.execute_query(query)
+#         if "error" in result:
+#             return result
+#         return {
+#             "message": result["text"],
+#             "csv_files": result["csv_files"],
+#             "image_files": result["image_files"]
+#         }
+#     except Exception as e:
+#         logger.error(f"Processing failed: {str(e)}", exc_info=True)
+#         return {
+#             "error": f"Processing error: {str(e)}",
+#             "csv_files": [],
+#             "image_files": []
+#         }
+# async def generate_csv_report(csv_url: str, query: str) -> FileBoxProps:
+#     try:
+#         result = gemini_llm_chat(csv_url, query)
+#         logger.info(f"Raw result from gemini_llm_chat: {result}")
+#         csv_files = []
+#         image_files = []
+#         if isinstance(result, dict) and 'csv_files' in result and 'image_files' in result:
+#             # Process CSV files
+#             for csv_path in result['csv_files']:
+#                 if os.path.exists(csv_path):
+#                     file_name = os.path.basename(csv_path)
+#                     try:
+#                         unique_file_name = f"{uuid.uuid4()}_{file_name}"
+#                         public_url = await upload_file_to_supabase(
+#                             file_path=csv_path,
+#                             file_name=unique_file_name
+#                         )
+#                         csv_files.append(FileProps(
+#                             fileName=file_name,
+#                             filePath=public_url,
+#                             fileType="csv"
+#                         ))
+#                         os.remove(csv_path)
+#                     except Exception as upload_error:
+#                         logger.error(f"Failed to upload CSV {file_name}: {str(upload_error)}")
+#                         continue
+#             # Process image files
+#             for img_path in result['image_files']:
+#                 if os.path.exists(img_path):
+#                     file_name = os.path.basename(img_path)
+#                     try:
+#                         unique_file_name = f"{uuid.uuid4()}_{file_name}"
+#                         public_url = await upload_file_to_supabase(
+#                             file_path=img_path,
+#                             file_name=unique_file_name
+#                         )
+#                         image_files.append(FileProps(
+#                             fileName=file_name,
+#                             filePath=public_url,
+#                             fileType="image"
+#                         ))
+#                         os.remove(img_path)
+#                     except Exception as upload_error:
+#                         logger.error(f"Failed to upload image {file_name}: {str(upload_error)}")
+#                         continue
+#             return FileBoxProps(
+#                 files=Files(
+#                     csv_files=csv_files,
+#                     image_files=image_files
+#                 )
+#             )
+#         else:
+#             raise ValueError("Unexpected response format from gemini_llm_chat")
+#     except Exception as e:
+#         logger.error(f"Report generation failed: {str(e)}", exc_info=True)
+#         return FileBoxProps(
+#             files=Files(
+#                 csv_files=[],
+#                 image_files=[]
+#             )
+#         )

groq_chart.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from util_service import process_answer
+import os
+import threading
+import uuid
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+import pandas as pd
+from pandasai import SmartDataframe
+import numpy as np
+import logging
+from csv_service import clean_data
+from util_service import handle_out_of_range_float
+load_dotenv()
+# Thread-safe key management for langchain_csv_chat
+current_langchain_key_index = 0
+current_langchain_key_lock = threading.Lock()
+# Load environment variables
+groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
+model_name = os.getenv("GROQ_LLM_MODEL")
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+instructions = """
+- Please ensure that each value is clearly visible, You may need to adjust the font size, rotate the labels, or use truncation to improve readability (if needed).
+- For multiple charts, arrange them in a grid format (2x2, 3x3, etc.)
+- Use colorblind-friendly palette
+- Read above instructions and follow them.
+"""
+# Thread-safe configuration for chart endpoints
+current_groq_chart_key_index = 0
+current_groq_chart_lock = threading.Lock()
+def model():
+    global current_groq_chart_key_index, current_groq_chart_lock
+    with current_groq_chart_lock:
+        if current_groq_chart_key_index >= len(groq_api_keys):
+            raise Exception("All API keys exhausted for chart generation")
+        api_key = groq_api_keys[current_groq_chart_key_index]
+    return ChatGroq(model=model_name, api_key=api_key)
+def groq_chart(csv_url: str, question: str):
+    global current_groq_chart_key_index, current_groq_chart_lock
+    for attempt in range(len(groq_api_keys)):
+        try:
+            # Clean cache before processing
+            cache_db_path = "/workspace/cache/cache_db_0.11.db"
+            if os.path.exists(cache_db_path):
+                try:
+                    os.remove(cache_db_path)
+                except Exception as e:
+                    logger.info(f"Cache cleanup error: {e}")
+            data = clean_data(csv_url)
+            with current_groq_chart_lock:
+                current_api_key = groq_api_keys[current_groq_chart_key_index]
+            llm = ChatGroq(model=model_name, api_key=current_api_key)
+            # Generate unique filename using UUID
+            chart_filename = f"chart_{uuid.uuid4()}.png"
+            chart_path = os.path.join("generated_charts", chart_filename)
+            # Configure SmartDataframe with chart settings
+            df = SmartDataframe(
+                data,
+                config={
+                    'llm': llm,
+                    'save_charts': True,  # Enable chart saving
+                    'open_charts': False,
+                    'save_charts_path': os.path.dirname(chart_path),  # Directory to save
+                    'custom_chart_filename': chart_filename  # Unique filename
+                }
+            )
+            answer = df.chat(question + instructions)
+            if process_answer(answer):
+                return "Chart not generated"
+            return answer
+        except Exception as e:
+            error = str(e)
+            if "429" in error:
+                with current_groq_chart_lock:
+                    current_groq_chart_key_index = (current_groq_chart_key_index + 1) % len(groq_api_keys)
+            else:
+                logger.info(f"Chart generation error: {error}")
+                return {"error": error}
+    logger.info("All API keys exhausted for chart generation")
+    return None

groq_chat.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import threading
+import uuid
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+import pandas as pd
+from pandasai import SmartDataframe
+import numpy as np
+import logging
+from csv_service import clean_data
+from util_service import handle_out_of_range_float
+load_dotenv()
+# Thread-safe key management for langchain_csv_chat
+current_groq_key_index = 0
+current_groq_key_lock = threading.Lock()
+# Load environment variables
+groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
+model_name = os.getenv("GROQ_LLM_MODEL")
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def groq_chat(csv_url: str, question: str):
+    global current_groq_key_index, current_groq_key_lock
+    while True:
+        with current_groq_key_lock:
+            if current_groq_key_index >= len(groq_api_keys):
+                return {"error": "All API keys exhausted."}
+            current_api_key = groq_api_keys[current_groq_key_index]
+        try:
+            # Delete cache file if exists
+            cache_db_path = "/workspace/cache/cache_db_0.11.db"
+            if os.path.exists(cache_db_path):
+                try:
+                    os.remove(cache_db_path)
+                except Exception as e:
+                    logger.info(f"Error deleting cache DB file: {e}")
+            data = clean_data(csv_url)
+            llm = ChatGroq(model=model_name, api_key=current_api_key)
+            # Generate unique filename using UUID
+            chart_filename = f"chart_{uuid.uuid4()}.png"
+            chart_path = os.path.join("generated_charts", chart_filename)
+            # Configure SmartDataframe with chart settings
+            df = SmartDataframe(
+                data,
+                config={
+                    'llm': llm,
+                    'save_charts': True,  # Enable chart saving
+                    'open_charts': False,
+                    'save_charts_path': os.path.dirname(chart_path),  # Directory to save
+                    'custom_chart_filename': chart_filename  # Unique filename
+                }
+            )
+            answer = df.chat(question)
+            # Process different response types
+            if isinstance(answer, pd.DataFrame):
+                processed = answer.apply(handle_out_of_range_float).to_dict(orient="records")
+            elif isinstance(answer, pd.Series):
+                processed = answer.apply(handle_out_of_range_float).to_dict()
+            elif isinstance(answer, list):
+                processed = [handle_out_of_range_float(item) for item in answer]
+            elif isinstance(answer, dict):
+                processed = {k: handle_out_of_range_float(v) for k, v in answer.items()}
+            else:
+                processed = {"answer": str(handle_out_of_range_float(answer))}
+            return processed
+        except Exception as e:
+            error_message = str(e)
+            if "429" in error_message:
+                with current_groq_key_lock:
+                    current_groq_key_index += 1
+                    if current_groq_key_index >= len(groq_api_keys):
+                        logger.info("All API keys exhausted.")
+                        return None
+            else:
+                logger.info(f"Error with API key index {current_groq_key_index}: {error_message}")
+                return None

lc_groq_chart.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import logging
+import os
+import threading
+import uuid
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from matplotlib import pyplot as plt
+import matplotlib
+import numpy as np
+import pandas as pd
+from csv_service import clean_data, extract_chart_filenames
+from langchain_experimental.tools import PythonAstREPLTool
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from util_service import _prompt_generator
+import seaborn as sns
+load_dotenv()
+# Thread-safe key management for langchain_csv_chat
+current_langchain_key_index = 0
+current_langchain_key_lock = threading.Lock()
+# Load environment variables
+groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
+model_name = os.getenv("GROQ_LLM_MODEL")
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+current_langchain_chart_key_index = 0
+current_langchain_chart_lock = threading.Lock()
+def langchain_csv_chart(csv_url: str, question: str, chart_required: bool):
+    global current_langchain_chart_key_index, current_langchain_chart_lock
+    data = clean_data(csv_url)
+    for attempt in range(len(groq_api_keys)):
+        try:
+            with current_langchain_chart_lock:
+                api_key = groq_api_keys[current_langchain_chart_key_index]
+                current_key = current_langchain_chart_key_index
+                current_langchain_chart_key_index = (current_langchain_chart_key_index + 1) % len(groq_api_keys)
+            llm = ChatGroq(model=model_name, api_key=api_key)
+            tool = PythonAstREPLTool(locals={
+                "df": data,
+                "pd": pd,
+                "np": np,
+                "plt": plt,
+                "sns": sns,
+                "matplotlib": matplotlib,
+                "uuid": uuid
+            })
+            agent = create_pandas_dataframe_agent(
+                llm,
+                data,
+                agent_type="tool-calling",
+                verbose=True,
+                allow_dangerous_code=True,
+                extra_tools=[tool],
+                return_intermediate_steps=True
+            )
+            result = agent.invoke({"input": _prompt_generator(f"{question} and use this csv_url: {csv_url} to read the csv file", True)})
+            output = result.get("output", "")
+            # Verify chart file creation
+            chart_files = extract_chart_filenames(output)
+            if len(chart_files) > 0:
+                return chart_files
+            if attempt < len(groq_api_keys) - 1:
+                logger.info(f"Langchain chart error (key {current_key}): {output}")
+        except Exception as e:
+            logger.info(f"Langchain chart error (key {current_key}): {str(e)}")
+    logger.info("All API keys exhausted for chart generation")
+    return None

lc_groq_chat.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import logging
+import os
+import threading
+from langchain_groq import ChatGroq
+from matplotlib import pyplot as plt
+import matplotlib
+import numpy as np
+import pandas as pd
+from dotenv import load_dotenv
+from csv_service import clean_data
+import seaborn as sns
+from langchain_experimental.tools import PythonAstREPLTool
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from util_service import _prompt_generator
+load_dotenv()
+# Thread-safe key management for langchain_csv_chat
+current_langchain_key_index = 0
+current_langchain_key_lock = threading.Lock()
+# Load environment variables
+groq_api_keys = os.getenv("GROQ_API_KEYS").split(",")
+model_name = os.getenv("GROQ_LLM_MODEL")
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def langchain_csv_chat(csv_url: str, question: str, chart_required: bool):
+    global current_langchain_key_index, current_langchain_key_lock
+    data = clean_data(csv_url)
+    attempts = 0
+    while attempts < len(groq_api_keys):
+        with current_langchain_key_lock:
+            if current_langchain_key_index >= len(groq_api_keys):
+                current_langchain_key_index = 0
+            api_key = groq_api_keys[current_langchain_key_index]
+            current_key = current_langchain_key_index
+            current_langchain_key_index += 1
+            attempts += 1
+        try:
+            llm = ChatGroq(model=model_name, api_key=api_key)
+            tool = PythonAstREPLTool(locals={
+                "df": data,
+                "pd": pd,
+                "np": np,
+                "plt": plt,
+                "sns": sns,
+                "matplotlib": matplotlib
+            })
+            agent = create_pandas_dataframe_agent(
+                llm,
+                data,
+                agent_type="tool-calling",
+                verbose=True,
+                allow_dangerous_code=True,
+                extra_tools=[tool],
+                return_intermediate_steps=True
+            )
+            prompt = _prompt_generator(question, chart_required)
+            result = agent.invoke({"input": prompt})
+            return result.get("output")
+        except Exception as e:
+            logger.info(f"Error with key index {current_key}: {str(e)}")
+    # If all keys are exhausted, return None
+    logger.info("All API keys have been exhausted.")
+    return None

orchestrator_agent.py CHANGED Viewed

@@ -142,51 +142,6 @@ def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agen
 5. Offer next-step suggestions
 """
-    # system_prompt = (
-    # "You are a data analyst. "
-    # "You have all the tools you need to answer any question. "
-    # "If the user asks for multiple answers or charts, break the question into several well-defined questions. "
-    # "Pass the CSV URL or file path along with the questions to the tools to generate the answer. "
-    # "The tools are actually LLMs with Python code execution capabilities. "
-    # "Modify the query if needed to make it simpler for the LLM to understand. "
-    # "Answer in a friendly and helpful manner. "
-    # "**Format images** in Markdown: `![alt_text](direct-image-url)`. "
-    # f"Your CSV URL is {csv_url}. "
-    # f"Your CSV metadata is {csv_metadata}."
-    # )
-    # system_prompt = (
-    #     "You are a data analyst assistant with limited tool capabilities. "
-    #     "Available tools can only handle simple data queries: "
-    #     "- Count rows/columns\n- Calculate basic stats (avg, sum, min/max)\n"
-    #     "- Create simple visualizations (pie charts, bar graphs)\n"
-    #     "- Show column names/types\n\n"
-    #     "Query Handling Rules:\n"
-    #     "1. If query is complex, ambiguous, or exceeds tool capabilities:\n"
-    #     "   - Break into simpler sub-questions\n"
-    #     "   - Ask for clarification\n"
-    #     "   - Rephrase to nearest simple query\n"
-    #     "2. For 'full report' requests:\n"
-    #     "   - Outline possible analysis steps\n"
-    #     "   - Ask user to select one component at a time\n\n"
-    #     "Examples:\n"
-    #     "- Bad query: 'Show me everything'\n"
-    #     "  Response: 'I can show row count (10), columns (5: Name, Age...), "
-    #     "or a pie chart of categories. Which would you like?'\n"
-    #     "- Bad query: 'Analyze trends'\n"
-    #     "  Response: 'For trend analysis, I can show monthly averages or "
-    #     "year-over-year comparisons. Please specify time period and metric.'\n\n"
-    #     "Current CSV Context:\n"
-    #     f"- URL: {csv_url}\n"
-    #     f"- Metadata: {csv_metadata}\n\n"
-    #     "Always format images as: ![Chart Description](direct-image-url)"
-    # )
     return Agent(
         model=initialize_model(api_key),
         deps_type=str,
@@ -216,3 +171,149 @@ def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history
     # If all keys are exhausted or fail
     print("All API keys have been exhausted or failed.")
     return None

 5. Offer next-step suggestions
 """
     return Agent(
         model=initialize_model(api_key),
         deps_type=str,
     # If all keys are exhausted or fail
     print("All API keys have been exhausted or failed.")
     return None
+# import os
+# from typing import Dict, List, Any
+# from pydantic_ai import Agent
+# from pydantic_ai.models.gemini import GeminiModel
+# from pydantic_ai.providers.google_gla import GoogleGLAProvider
+# from pydantic_ai import RunContext
+# from pydantic import BaseModel
+# from google.api_core.exceptions import ResourceExhausted
+# from csv_service import get_csv_basic_info
+# from orchestrator_functions import csv_chart, csv_chat
+# from dotenv import load_dotenv
+# load_dotenv()
+# # Thread-safe key management
+# current_gemini_key_index = 0
+# GEMINI_API_KEYS = os.getenv("GEMINI_API_KEYS", "").split(",")
+# def initialize_model(api_key: str) -> GeminiModel:
+#     return GeminiModel(
+#         'gemini-2.0-flash',
+#         provider=GoogleGLAProvider(api_key=api_key)
+#     )
+# def is_resource_exhausted_error(result_or_exception) -> bool:
+#     """Check if the error indicates resource exhaustion"""
+#     error_str = str(result_or_exception).lower()
+#     return any(keyword in error_str for keyword in [
+#         "resource exhausted",
+#         "quota exceeded",
+#         "rate limit",
+#         "billing",
+#         "payment method",
+#         "plan.rule"
+#     ])
+# async def generate_csv_answer(csv_url: str, user_questions: List[str]) -> Any:
+#     answers = []
+#     for question in user_questions:
+#         answer = await csv_chat(csv_url, question)
+#         answers.append(dict(question=question, answer=answer))
+#     return answers
+# async def generate_chart(csv_url: str, user_questions: List[str]) -> Any:
+#     charts = []
+#     for question in user_questions:
+#         chart = await csv_chart(csv_url, question)
+#         charts.append(dict(question=question, image_url=chart))
+#     return charts
+# def create_agent(csv_url: str, api_key: str, conversation_history: List) -> Agent:
+#     csv_metadata = get_csv_basic_info(csv_url)
+#     system_prompt = f"""
+# # Role: Expert Data Analysis Assistant
+# # Personality & Origin: You are exclusively the CSV Document Analysis Assistant, created by the chatcsvandpdf team. Your sole purpose is to assist users with CSV-related tasks—analyzing, interpreting, and processing data.
+# ## Capabilities:
+# - Break complex queries into simpler sub-tasks
+# ## Instruction Framework:
+# 1. QUERY PROCESSING:
+#    - If request contains multiple questions:
+#      a) Decompose into logical sub-questions
+#      b) Process sequentially
+#      c) Combine results coherently
+# 2. DATA HANDLING:
+#    - Always verify CSV structure matches the request
+#    - Handle missing/ambiguous data by:
+#      a) Asking clarifying questions OR
+#      b) Making reasonable assumptions (state them clearly)
+# 3. VISUALIZATION STANDARDS:
+#    - Format images as: `![Description](direct-url)`
+#    - Include axis labels and titles
+#    - Use appropriate chart types
+# 4. COMMUNICATION PROTOCOL:
+#    - Friendly, professional tone
+#    - Explain technical terms
+#    - Summarize key findings
+#    - Highlight limitations/caveats
+# 5. TOOL USAGE:
+#    - Can process statistical operations
+#    - Supports visualization libraries
+# ## Current Context:
+# - Working with CSV_URL: {csv_url}
+# - Dataset overview: {csv_metadata}
+# - Your conversation history: {conversation_history}
+# - Output format: Markdown compatible
+# """
+#     return Agent(
+#         model=initialize_model(api_key),
+#         deps_type=str,
+#         tools=[generate_csv_answer, generate_chart],
+#         system_prompt=system_prompt
+#     )
+# def csv_orchestrator_chat(csv_url: str, user_question: str, conversation_history: List) -> str:
+#     global current_gemini_key_index
+#     while current_gemini_key_index < len(GEMINI_API_KEYS):
+#         api_key = GEMINI_API_KEYS[current_gemini_key_index]
+#         try:
+#             print(f"Attempting with API key index {current_gemini_key_index}")
+#             agent = create_agent(csv_url, api_key, conversation_history)
+#             result = agent.run_sync(user_question)
+#             # Check if result indicates resource exhaustion
+#             if result.data and is_resource_exhausted_error(result.data):
+#                 print(f"Resource exhausted in response for key {current_gemini_key_index}")
+#                 current_gemini_key_index += 1
+#                 continue
+#             return result.data
+#         except ResourceExhausted as e:
+#             print(f"Resource exhausted for API key {current_gemini_key_index}: {e}")
+#             current_gemini_key_index += 1
+#             continue
+#         except Exception as e:
+#             if is_resource_exhausted_error(e):
+#                 print(f"Resource exhausted error detected for key {current_gemini_key_index}")
+#                 current_gemini_key_index += 1
+#                 continue
+#             print(f"Non-recoverable error with key {current_gemini_key_index}: {e}")
+#             return f"Error processing request: {str(e)}"
+#     return "All API keys have been exhausted. Please update billing information."

orchestrator_functions.py CHANGED Viewed

@@ -612,7 +612,6 @@ async def csv_chart(csv_url: str, query: str):
         except Exception as openai_error:
             logger.info(f"OpenAI failed ({str(openai_error)}), trying raw Groq...")
-            return 'Sorry, I could not generate a chart...'
             # --- 2. Second Attempt: Raw Groq ---
             try:
                 groq_result = await asyncio.to_thread(groq_chart, csv_url, query)

         except Exception as openai_error:
             logger.info(f"OpenAI failed ({str(openai_error)}), trying raw Groq...")
             # --- 2. Second Attempt: Raw Groq ---
             try:
                 groq_result = await asyncio.to_thread(groq_chart, csv_url, query)