Spaces:

charesz
/

csv-redaer-bot

Sleeping

File size: 2,021 Bytes

ec0daac
cbf249a
9fe6fc0
46b0e84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e53c1ba
f9e9ed3
9fe6fc0
 
c39a167
9fe6fc0
ec0daac
 
9fe6fc0
ec0daac
46b0e84
 
 
9fe6fc0
46b0e84
 
ec0daac
46b0e84
9fe6fc0
ec0daac
46b0e84
 
 
 
9fe6fc0
 
46b0e84
 
 
 
 
 
9fe6fc0
f9e9ed3
e53c1ba
9fe6fc0
 
 
 
 
 
 
 
 
 
e53c1ba
9fe6fc0

from huggingface_hub import InferenceClient
import os
import json
import pandas as pd

def compute_dataset_stats(dataset_summary):
    """Compute mean, median, std if numeric values exist."""
    try:
        df = pd.DataFrame(dataset_summary)
        stats = df.describe(include='all').to_dict()
        return stats
    except Exception:
        return {}

def format_stats(stats):
    """Format statistics for readability."""
    try:
        return json.dumps(stats, indent=2)
    except:
        return "No statistics available."

def get_hf_client():
    token = os.getenv("HF_TOKEN")
    return InferenceClient(token=token) if token else None

def query_agent_from_csv(user_query, dataset_summary, chat_history, model_repo):
    client = get_hf_client()
    if client is None:
        return "", "Missing HF_TOKEN. Please set it in your environment or secrets."

    dataset_stats = compute_dataset_stats(dataset_summary)
    formatted_stats = format_stats(dataset_stats)

    prompt = f"""
You are a helpful and expert data analyst.
Use ONLY the dataset info and summary statistics below to answer.

📊 Dataset Summary:
{json.dumps(dataset_summary, indent=2)}

📌 Descriptive Statistics (mean, median, std, etc.):
{formatted_stats}

💬 Previous Conversation:
{json.dumps(chat_history[-5:], indent=2)}

❓ Current Question: {user_query}

⚠️ Important Rules:
- If a value cannot be derived from dataset, clearly say "The dataset does not provide this information."
- Show calculations briefly if applicable.
- Be concise and factual.
"""

    try:
        response = client.chat_completion(
            model=model_repo,
            messages=[
                {"role": "system", "content": "You answer based ONLY on the dataset provided."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=512
        )
        answer = response.choices[0].message["content"]
        return answer, ""
    except Exception as e:
        return "", f"Error contacting model: {e}"