Spaces:

sree4411
/

Extract_csv

Sleeping

App Files Files Community

sree4411 commited on Jun 3, 2025

Commit

7569eb9

verified ·

1 Parent(s): 646303e

Create app.py

Browse files

Files changed (1) hide show

app.py +99 -0

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from io import StringIO
+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
+# Set API token
+os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("hf")
+os.environ['HF_TOKEN'] = os.getenv("hf")
+st.title("📊 DataCraft CSV")
+st.subheader("– Crafting insights from structured data")
+# Session state for chat history
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# Upload CSV
+uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+    st.success("✅ File loaded successfully!")
+    st.subheader("🔍 Quick Summary")
+    st.write("**Shape:**", df.shape)
+    st.write("**Columns:**", df.columns.tolist())
+    st.write("**Missing Values:**")
+    st.dataframe(df.isnull().sum())
+    st.write("**Data Types:**")
+    st.dataframe(df.dtypes)
+    st.subheader("💬 Ask a question about the dataset")
+    user_input = st.text_input("E.g. 'What are the average values?', 'Plot sales over time'")
+    # Hugging Face Model Setup
+    deepseek = HuggingFaceEndpoint(
+        repo_id="deepseek-ai/DeepSeek-R1",
+        provider="nebius",
+        temperature=0.5,
+        max_new_tokens=150,
+        task="conversational"
+    )
+    model = ChatHuggingFace(
+        llm=deepseek,
+        repo_id=deepseek.repo_id,
+        provider=deepseek.provider,
+        temperature=0.5,
+        max_new_tokens=150,
+        task="conversational"
+    )
+    if user_input:
+        df_sample = df.head(50).to_csv(index=False)
+        prompt = f"""
+You are a helpful data analyst. Here's a preview of the dataset and a user question. Provide an answer in plain English. If the question mentions plotting, include the code as well.
+Dataset:
+{df_sample}
+User question: {user_input}
+"""
+        with st.spinner("Thinking..."):
+            try:
+                response = model.invoke([{"role": "user", "content": prompt}])
+                result = response.content if hasattr(response, "content") else response
+                st.session_state.chat_history.append((user_input, result))
+                st.markdown("### 🧠 Answer")
+                st.write(result)
+                # Optional: Execute simple plot command if mentioned
+                if "plot" in user_input.lower():
+                    with st.expander("📈 Try plotting automatically"):
+                        try:
+                            # Try simple detection for column plots
+                            cols = df.select_dtypes(include='number').columns.tolist()
+                            if len(cols) >= 2:
+                                fig, ax = plt.subplots()
+                                sns.lineplot(data=df, x=cols[0], y=cols[1], ax=ax)
+                                st.pyplot(fig)
+                            else:
+                                st.info("Could not find enough numeric columns to plot.")
+                        except Exception as e:
+                            st.error(f"Plotting failed: {e}")
+            except Exception as e:
+                st.error(f"Error: {e}")
+    # Display previous chat history
+    if st.session_state.chat_history:
+        st.subheader("📚 Previous Q&A")
+        for q, a in st.session_state.chat_history:
+            st.markdown(f"**You:** {q}")
+            st.markdown(f"**Bot:** {a}")