AutoML / rag /rag_query.py
Al1Abdullah's picture
Add Dockerfile and update API key handling for Hugging Face deployment
95fb3fe
"""Module for querying the Groq API with dataset context."""
from groq import Groq, APIStatusError
from rag.memory import get_dataset
import pandas as pd
import logging
import os
# Configure logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize Groq client with API key from environment variable
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
logging.error("GROQ_API_KEY environment variable not set. Please set it in Hugging Face Space Secrets.")
raise ValueError("GROQ_API_KEY environment variable not set.")
client = Groq(api_key=GROQ_API_KEY)
def query_dataset_with_groq(dataset_name, user_query):
"""Queries the Groq API with a user question, providing dataset context.
Args:
dataset_name (str): The name of the dataset to retrieve from memory.
user_query (str): The user's question about the dataset.
Returns:
str: The AI's answer to the question, or an error message if the query fails.
"""
logging.info(f"Attempting to query Groq with user question: {user_query}")
df = get_dataset(dataset_name)
if df is None:
logging.error(f"Dataset '{dataset_name}' not found in memory for Groq query.")
return "No dataset found with that name. Please upload a dataset first."
# Prepare context for the LLM, including dataset overview, summary statistics, and a sample
context = f"""
You are an expert Data Analyst. You have been provided with a dataset.
**Dataset Overview:**
- **Shape:** {df.shape[0]} rows and {df.shape[1]} columns.
- **Columns and Data Types:**\n{df.dtypes.to_string()}
**Summary Statistics:**\n{df.describe(include='all').to_string()}
**First 5 Rows:**\n{df.head(5).to_string(index=False)}
**User Question:** {user_query}
Answer the user's question clearly and accurately based *only* on the provided dataset information.
"""
try:
logging.info("Sending request to Groq API for chat completion.")
response = client.chat.completions.create(
model="llama3-70b-8192", # Using a powerful model for better understanding
messages=[
{"role": "system", "content": "You are a helpful data science assistant. Provide concise and accurate answers."},
{"role": "user", "content": context}
],
temperature=0.1, # Low temperature for factual and less creative responses
max_tokens=1024, # Limit response length
top_p=1,
stop=None,
)
ai_response_content = response.choices[0].message.content
logging.info("Successfully received response from Groq API.")
return ai_response_content
except APIStatusError as e:
logging.error(f"Groq API error occurred: Status Code {e.status_code}, Response: {e.response}", exc_info=True)
if e.status_code == 503:
return "The AI service is currently unavailable due to high demand or maintenance. Please try again later."
else:
return f"An error occurred with the AI service (Status: {e.status_code}). Please check the logs for more details."
except Exception as e:
logging.error(f"An unexpected error occurred while querying the AI: {e}", exc_info=True)
return f"An unexpected error occurred while processing your request: {e}"