Spaces:

Aditya-1911
/

DataScience_Chatbot

Running

Aditya-1911 commited on Apr 27, 2025

Commit

d872ac4

verified ·

1 Parent(s): 3994fc9

Upload 5 files

Files changed (5) hide show

.huggingface.yml ADDED Viewed

README.md CHANGED Viewed

@@ -1,13 +1,21 @@
----
-title: DataScience Chatbot
-emoji: 🔥
-colorFrom: pink
-colorTo: gray
-sdk: streamlit
-sdk_version: 1.44.1
-app_file: app.py
-pinned: false
-short_description: A chat bot which will assist dataanalysis
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 📄 CSV Assistant powered by Hugging Face and Starcoder
+This is a Streamlit app that allows you to:
+- Upload any CSV file
+- Ask natural language queries (e.g., "remove nulls", "filter sales > 1000")
+- AI generates Python (Pandas) code
+- Apply it directly to your data
+- Download the updated CSV file
+Powered by Hugging Face `bigcode/starcoder` via Inference API.
+## Setup:
+- Add your Hugging Face Inference API Token as a Secret:
+    - Name: HUGGINGFACEHUB_API_TOKEN
+    - Value: your Hugging Face token
+- Then deploy and enjoy!
+✅ Free
+✅ No heavy compute needed
+✅ No fine-tuning needed

app.py ADDED Viewed

+import streamlit as st
+import pandas as pd
+import os
+from csv_agent import generate_code
+st.set_page_config(page_title="CSV Assistant with Hugging Face", layout="wide")
+st.title("🤖 CSV Assistant: Powered by Starcoder (Hugging Face)")
+# Check for API token
+if os.getenv("HUGGINGFACEHUB_API_TOKEN") is None:
+    st.error("❌ API Token missing! Please set HUGGINGFACEHUB_API_TOKEN in your Space Secrets.")
+    st.stop()
+st.sidebar.title("Upload Your CSV")
+uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"])
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+    st.write("### Preview of your data:", df.head())
+    prompt = st.text_input("💬 What would you like to do with the data?")
+    if prompt:
+        with st.spinner("Generating Python code using AI..."):
+            code = generate_code(prompt)
+            st.code(code, language="python")
+            try:
+                exec_globals = {"df": df}
+                exec(code, exec_globals)
+                df = exec_globals["df"]
+                st.success("✅ Data updated successfully!")
+                st.write(df.head())
+                # Allow download
+                csv = df.to_csv(index=False).encode('utf-8')
+                st.download_button("⬇️ Download Updated CSV", csv, "updated_data.csv", "text/csv")
+            except Exception as e:
+                st.error(f"⚠️ Error while executing AI-generated code: {e}")

csv_agent.py ADDED Viewed

+import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    model="bigcode/starcoder",
+    token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
+)
+def generate_code(prompt):
+    full_prompt = f"""You are a Python Data Scientist.
+Given the following instruction, write Python pandas code to perform the task on a dataframe named df.
+Instruction: {prompt}
+Output only valid Python code. No explanations."""
+    response = client.text_generation(
+        prompt=full_prompt,
+        max_new_tokens=300,
+        temperature=0.2,
+        top_p=0.95,
+        repetition_penalty=1.1
+    )
+    return response

requirements.txt ADDED Viewed

+streamlit
+pandas
+huggingface_hub