"""Module for querying the Groq API with dataset context.""" from groq import Groq, APIStatusError from rag.memory import get_dataset import pandas as pd import logging import os # Configure logging for this module logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Initialize Groq client with API key from environment variable GROQ_API_KEY = os.environ.get("GROQ_API_KEY") if not GROQ_API_KEY: logging.error("GROQ_API_KEY environment variable not set. Please set it in Hugging Face Space Secrets.") raise ValueError("GROQ_API_KEY environment variable not set.") client = Groq(api_key=GROQ_API_KEY) def query_dataset_with_groq(dataset_name, user_query): """Queries the Groq API with a user question, providing dataset context. Args: dataset_name (str): The name of the dataset to retrieve from memory. user_query (str): The user's question about the dataset. Returns: str: The AI's answer to the question, or an error message if the query fails. """ logging.info(f"Attempting to query Groq with user question: {user_query}") df = get_dataset(dataset_name) if df is None: logging.error(f"Dataset '{dataset_name}' not found in memory for Groq query.") return "No dataset found with that name. Please upload a dataset first." # Prepare context for the LLM, including dataset overview, summary statistics, and a sample context = f""" You are an expert Data Analyst. You have been provided with a dataset. **Dataset Overview:** - **Shape:** {df.shape[0]} rows and {df.shape[1]} columns. - **Columns and Data Types:**\n{df.dtypes.to_string()} **Summary Statistics:**\n{df.describe(include='all').to_string()} **First 5 Rows:**\n{df.head(5).to_string(index=False)} **User Question:** {user_query} Answer the user's question clearly and accurately based *only* on the provided dataset information. """ try: logging.info("Sending request to Groq API for chat completion.") response = client.chat.completions.create( model="llama3-70b-8192", # Using a powerful model for better understanding messages=[ {"role": "system", "content": "You are a helpful data science assistant. Provide concise and accurate answers."}, {"role": "user", "content": context} ], temperature=0.1, # Low temperature for factual and less creative responses max_tokens=1024, # Limit response length top_p=1, stop=None, ) ai_response_content = response.choices[0].message.content logging.info("Successfully received response from Groq API.") return ai_response_content except APIStatusError as e: logging.error(f"Groq API error occurred: Status Code {e.status_code}, Response: {e.response}", exc_info=True) if e.status_code == 503: return "The AI service is currently unavailable due to high demand or maintenance. Please try again later." else: return f"An error occurred with the AI service (Status: {e.status_code}). Please check the logs for more details." except Exception as e: logging.error(f"An unexpected error occurred while querying the AI: {e}", exc_info=True) return f"An unexpected error occurred while processing your request: {e}"