Spaces:
Sleeping
Sleeping
File size: 3,389 Bytes
aa68823 95fb3fe aa68823 95fb3fe aa68823 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
"""Module for querying the Groq API with dataset context."""
from groq import Groq, APIStatusError
from rag.memory import get_dataset
import pandas as pd
import logging
import os
# Configure logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize Groq client with API key from environment variable
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
logging.error("GROQ_API_KEY environment variable not set. Please set it in Hugging Face Space Secrets.")
raise ValueError("GROQ_API_KEY environment variable not set.")
client = Groq(api_key=GROQ_API_KEY)
def query_dataset_with_groq(dataset_name, user_query):
"""Queries the Groq API with a user question, providing dataset context.
Args:
dataset_name (str): The name of the dataset to retrieve from memory.
user_query (str): The user's question about the dataset.
Returns:
str: The AI's answer to the question, or an error message if the query fails.
"""
logging.info(f"Attempting to query Groq with user question: {user_query}")
df = get_dataset(dataset_name)
if df is None:
logging.error(f"Dataset '{dataset_name}' not found in memory for Groq query.")
return "No dataset found with that name. Please upload a dataset first."
# Prepare context for the LLM, including dataset overview, summary statistics, and a sample
context = f"""
You are an expert Data Analyst. You have been provided with a dataset.
**Dataset Overview:**
- **Shape:** {df.shape[0]} rows and {df.shape[1]} columns.
- **Columns and Data Types:**\n{df.dtypes.to_string()}
**Summary Statistics:**\n{df.describe(include='all').to_string()}
**First 5 Rows:**\n{df.head(5).to_string(index=False)}
**User Question:** {user_query}
Answer the user's question clearly and accurately based *only* on the provided dataset information.
"""
try:
logging.info("Sending request to Groq API for chat completion.")
response = client.chat.completions.create(
model="llama3-70b-8192", # Using a powerful model for better understanding
messages=[
{"role": "system", "content": "You are a helpful data science assistant. Provide concise and accurate answers."},
{"role": "user", "content": context}
],
temperature=0.1, # Low temperature for factual and less creative responses
max_tokens=1024, # Limit response length
top_p=1,
stop=None,
)
ai_response_content = response.choices[0].message.content
logging.info("Successfully received response from Groq API.")
return ai_response_content
except APIStatusError as e:
logging.error(f"Groq API error occurred: Status Code {e.status_code}, Response: {e.response}", exc_info=True)
if e.status_code == 503:
return "The AI service is currently unavailable due to high demand or maintenance. Please try again later."
else:
return f"An error occurred with the AI service (Status: {e.status_code}). Please check the logs for more details."
except Exception as e:
logging.error(f"An unexpected error occurred while querying the AI: {e}", exc_info=True)
return f"An unexpected error occurred while processing your request: {e}" |