Spaces:
Sleeping
Sleeping
File size: 3,991 Bytes
779bb9b 62e465d 6d6de4b 62e465d 779bb9b 62e465d 779bb9b 62e465d 779bb9b 62e465d 779bb9b 62e465d 779bb9b 62e465d 6d6de4b 50e1eaf 62e465d 779bb9b 62e465d 779bb9b 62e465d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import os
import time
import pandas as pd
import streamlit as st
from io import StringIO
from dotenv import load_dotenv
from huggingface_hub import InferenceClient, login
# ==========================================================
# π Load environment + authenticate
# ==========================================================
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
st.error("β Missing Hugging Face token. Please set HF_TOKEN in your .env file.")
else:
login(token=HF_TOKEN)
# Create HF clients
cleaner_client = InferenceClient(model="Qwen/Qwen2.5-Coder-14B", token=HF_TOKEN)
analyst_client = InferenceClient(model="Qwen/Qwen2.5-14B-Instruct", token=HF_TOKEN)
# ==========================================================
# ποΈ App Layout
# ==========================================================
st.set_page_config(page_title="π§Ή Smart Data Analysis", page_icon="π", layout="wide")
st.title("π Smart Data Analysis Assistant")
st.caption("Clean messy data, then run AI-powered insights and statistical analysis β all locally with open-source models.")
# ==========================================================
# π Upload CSV
# ==========================================================
uploaded_file = st.file_uploader("π€ Upload your CSV dataset", type=["csv"])
if uploaded_file:
df_raw = pd.read_csv(uploaded_file)
st.subheader("π Raw Data Preview")
st.dataframe(df_raw.head())
# ==========================================================
# π§Ή Data Cleaning
# ==========================================================
if st.button("π§Ή Clean Data using Qwen Coder 14B"):
with st.spinner("Cleaning data... please wait β³"):
try:
# Convert DataFrame to text for cleaning
csv_text = df_raw.to_csv(index=False)
prompt = f"""
You are a Python data cleaning assistant.
Clean this dataset and fix inconsistent column names, missing values, and formatting.
Return a clean CSV version that can be loaded into pandas directly.
Dataset:
{csv_text}
"""
response = cleaner_client.text_generation(
prompt,
temperature=0.2,
max_new_tokens=2048,
)
cleaned_csv = response.strip().split("```")[-1] # extract text
df_cleaned = pd.read_csv(StringIO(cleaned_csv))
st.session_state.cleaned_df = df_cleaned
st.success("β
Data cleaned successfully!")
st.dataframe(df_cleaned.head())
except Exception as e:
st.error(f"β οΈ Cleaning failed: {e}")
# ==========================================================
# π Data Analysis
# ==========================================================
if "cleaned_df" in st.session_state:
df = st.session_state.cleaned_df
st.divider()
st.subheader("π AI Data Analysis")
user_query = st.text_area("Ask about your data:", placeholder="e.g., What is the correlation between experience and salary?")
if st.button("π Analyze"):
with st.spinner("Analyzing with Qwen 14B Instruct..."):
try:
csv_excerpt = df.head(30).to_csv(index=False)
analysis_prompt = f"""
You are a data analyst. Analyze this dataset and answer the question.
Data sample (CSV):
{csv_excerpt}
Question:
{user_query}
Instructions:
- Be accurate and concise.
- If numerical analysis is relevant, describe it.
- Use markdown for readability.
"""
response = analyst_client.text_generation(
analysis_prompt,
temperature=0.5,
max_new_tokens=1024,
)
st.markdown("### π§ Analysis Result")
st.write(response.strip())
except Exception as e:
st.error(f"β οΈ Analysis failed: {e}")
|