File size: 3,402 Bytes
94c8489
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92b0aed
94c8489
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5c0f15
94c8489
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import pandas as pd
import plotly.express as px
from groq import Groq
import os

# ---------------- CONFIG ----------------
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

# ---------------- CORE FUNCTION ----------------
def analyze_health_data(file):
    if file is None:
        return "Please upload a CSV file.", None, None

    # Read CSV file
    try:
        df = pd.read_csv(file.name)
    except Exception as e:
        return f"Error reading CSV: {e}", None, None

    # Drop completely empty columns
    df = df.dropna(axis=1, how="all")

    # Detect numeric columns for analysis
    numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
    if len(numeric_cols) < 2:
        return "Please upload a dataset with at least two numeric columns.", None, None

    # Try to find a date/time column
    date_cols = [c for c in df.columns if "date" in c.lower() or "time" in c.lower()]
    x_col = date_cols[0] if date_cols else df.columns[0]

    # Convert potential date column
    try:
        df[x_col] = pd.to_datetime(df[x_col], errors="ignore")
    except Exception:
        pass

    # ---------------- VISUALIZATIONS ----------------
    # First chart β€” all numeric columns line plot
    fig1 = px.line(
        df,
        x=x_col,
        y=numeric_cols,
        title="Health Metrics Over Time",
        markers=True,
        labels={"value": "Metric Value", "variable": "Health Metric"},
    )

    # Second chart β€” correlation heatmap (if >2 numeric cols)
    if len(numeric_cols) >= 2:
        corr = df[numeric_cols].corr()
        fig2 = px.imshow(
            corr,
            text_auto=True,
            title="Correlation Between Health Metrics",
            color_continuous_scale="Blues",
        )
    else:
        fig2 = None

    # ---------------- SUMMARY STATS ----------------
    summary = df.describe(include="all").to_string()

    # ---------------- GENERATE AI INSIGHTS ----------------
    prompt = f"""
    You are an expert digital health assistant.
    The user uploaded the following dataset summary:
    {summary}

    Identify 3 interesting insights or patterns (e.g., trends, correlations),
    and 2 practical recommendations that could help improve their health behavior,
    based solely on the numerical data provided.
    """

    try:
        response = client.chat.completions.create(
            model="deepseek-r1-distill-llama-70b",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
        )
        insights = response.choices[0].message.content.strip()
    except Exception as e:
        insights = f"Error generating AI insights: {e}"

    return insights, fig1, fig2


# ---------------- GRADIO INTERFACE ----------------
title = "πŸ₯ HealthMind AI β€” Universal Health Data Insight Dashboard"
desc = (
    "Upload any health-related CSV file. "
    "The app will automatically detect numeric columns, visualize trends, "
    "and generate AI-driven insights about your data."
)

demo = gr.Interface(
    fn=analyze_health_data,
    inputs=[gr.File(label="Upload Health Data (CSV)", file_types=[".csv"])],
    outputs=[
        gr.Textbox(label="AI Health Insights", lines=10),
        gr.Plot(label="Metric Trends"),
        gr.Plot(label="Correlation Heatmap"),
    ],
    title=title,
    description=desc
)

if __name__ == "__main__":
    demo.launch()