File size: 2,021 Bytes
ec0daac
cbf249a
9fe6fc0
46b0e84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e53c1ba
f9e9ed3
9fe6fc0
 
c39a167
9fe6fc0
ec0daac
 
9fe6fc0
ec0daac
46b0e84
 
 
9fe6fc0
46b0e84
 
ec0daac
46b0e84
9fe6fc0
ec0daac
46b0e84
 
 
 
9fe6fc0
 
46b0e84
 
 
 
 
 
9fe6fc0
f9e9ed3
e53c1ba
9fe6fc0
 
 
 
 
 
 
 
 
 
e53c1ba
9fe6fc0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from huggingface_hub import InferenceClient
import os
import json
import pandas as pd

def compute_dataset_stats(dataset_summary):
    """Compute mean, median, std if numeric values exist."""
    try:
        df = pd.DataFrame(dataset_summary)
        stats = df.describe(include='all').to_dict()
        return stats
    except Exception:
        return {}

def format_stats(stats):
    """Format statistics for readability."""
    try:
        return json.dumps(stats, indent=2)
    except:
        return "No statistics available."

def get_hf_client():
    token = os.getenv("HF_TOKEN")
    return InferenceClient(token=token) if token else None

def query_agent_from_csv(user_query, dataset_summary, chat_history, model_repo):
    client = get_hf_client()
    if client is None:
        return "", "Missing HF_TOKEN. Please set it in your environment or secrets."

    dataset_stats = compute_dataset_stats(dataset_summary)
    formatted_stats = format_stats(dataset_stats)

    prompt = f"""
You are a helpful and expert data analyst.
Use ONLY the dataset info and summary statistics below to answer.

πŸ“Š Dataset Summary:
{json.dumps(dataset_summary, indent=2)}

πŸ“Œ Descriptive Statistics (mean, median, std, etc.):
{formatted_stats}

πŸ’¬ Previous Conversation:
{json.dumps(chat_history[-5:], indent=2)}

❓ Current Question: {user_query}

⚠️ Important Rules:
- If a value cannot be derived from dataset, clearly say "The dataset does not provide this information."
- Show calculations briefly if applicable.
- Be concise and factual.
"""

    try:
        response = client.chat_completion(
            model=model_repo,
            messages=[
                {"role": "system", "content": "You answer based ONLY on the dataset provided."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=512
        )
        answer = response.choices[0].message["content"]
        return answer, ""
    except Exception as e:
        return "", f"Error contacting model: {e}"