Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import pandas as pd | |
| import streamlit as st | |
| from io import StringIO | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient, login | |
| # ========================================================== | |
| # π Load environment + authenticate | |
| # ========================================================== | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if not HF_TOKEN: | |
| st.error("β Missing Hugging Face token. Please set HF_TOKEN in your .env file.") | |
| else: | |
| login(token=HF_TOKEN) | |
| # Create HF clients | |
| cleaner_client = InferenceClient(model="Qwen/Qwen2.5-Coder-14B", token=HF_TOKEN) | |
| analyst_client = InferenceClient(model="Qwen/Qwen2.5-14B-Instruct", token=HF_TOKEN) | |
| # ========================================================== | |
| # ποΈ App Layout | |
| # ========================================================== | |
| st.set_page_config(page_title="π§Ή Smart Data Analysis", page_icon="π", layout="wide") | |
| st.title("π Smart Data Analysis Assistant") | |
| st.caption("Clean messy data, then run AI-powered insights and statistical analysis β all locally with open-source models.") | |
| # ========================================================== | |
| # π Upload CSV | |
| # ========================================================== | |
| uploaded_file = st.file_uploader("π€ Upload your CSV dataset", type=["csv"]) | |
| if uploaded_file: | |
| df_raw = pd.read_csv(uploaded_file) | |
| st.subheader("π Raw Data Preview") | |
| st.dataframe(df_raw.head()) | |
| # ========================================================== | |
| # π§Ή Data Cleaning | |
| # ========================================================== | |
| if st.button("π§Ή Clean Data using Qwen Coder 14B"): | |
| with st.spinner("Cleaning data... please wait β³"): | |
| try: | |
| # Convert DataFrame to text for cleaning | |
| csv_text = df_raw.to_csv(index=False) | |
| prompt = f""" | |
| You are a Python data cleaning assistant. | |
| Clean this dataset and fix inconsistent column names, missing values, and formatting. | |
| Return a clean CSV version that can be loaded into pandas directly. | |
| Dataset: | |
| {csv_text} | |
| """ | |
| response = cleaner_client.text_generation( | |
| prompt, | |
| temperature=0.2, | |
| max_new_tokens=2048, | |
| ) | |
| cleaned_csv = response.strip().split("```")[-1] # extract text | |
| df_cleaned = pd.read_csv(StringIO(cleaned_csv)) | |
| st.session_state.cleaned_df = df_cleaned | |
| st.success("β Data cleaned successfully!") | |
| st.dataframe(df_cleaned.head()) | |
| except Exception as e: | |
| st.error(f"β οΈ Cleaning failed: {e}") | |
| # ========================================================== | |
| # π Data Analysis | |
| # ========================================================== | |
| if "cleaned_df" in st.session_state: | |
| df = st.session_state.cleaned_df | |
| st.divider() | |
| st.subheader("π AI Data Analysis") | |
| user_query = st.text_area("Ask about your data:", placeholder="e.g., What is the correlation between experience and salary?") | |
| if st.button("π Analyze"): | |
| with st.spinner("Analyzing with Qwen 14B Instruct..."): | |
| try: | |
| csv_excerpt = df.head(30).to_csv(index=False) | |
| analysis_prompt = f""" | |
| You are a data analyst. Analyze this dataset and answer the question. | |
| Data sample (CSV): | |
| {csv_excerpt} | |
| Question: | |
| {user_query} | |
| Instructions: | |
| - Be accurate and concise. | |
| - If numerical analysis is relevant, describe it. | |
| - Use markdown for readability. | |
| """ | |
| response = analyst_client.text_generation( | |
| analysis_prompt, | |
| temperature=0.5, | |
| max_new_tokens=1024, | |
| ) | |
| st.markdown("### π§ Analysis Result") | |
| st.write(response.strip()) | |
| except Exception as e: | |
| st.error(f"β οΈ Analysis failed: {e}") | |