Aditya-1911 commited on
Commit
d872ac4
·
verified ·
1 Parent(s): 3994fc9

Upload 5 files

Browse files
Files changed (5) hide show
  1. .huggingface.yml +2 -0
  2. README.md +21 -13
  3. app.py +38 -0
  4. csv_agent.py +22 -0
  5. requirements.txt +3 -0
.huggingface.yml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ sdk: streamlit
2
+ app_file: app.py
README.md CHANGED
@@ -1,13 +1,21 @@
1
- ---
2
- title: DataScience Chatbot
3
- emoji: 🔥
4
- colorFrom: pink
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: A chat bot which will assist dataanalysis
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
+ # 📄 CSV Assistant powered by Hugging Face and Starcoder
2
+
3
+ This is a Streamlit app that allows you to:
4
+ - Upload any CSV file
5
+ - Ask natural language queries (e.g., "remove nulls", "filter sales > 1000")
6
+ - AI generates Python (Pandas) code
7
+ - Apply it directly to your data
8
+ - Download the updated CSV file
9
+
10
+ Powered by Hugging Face `bigcode/starcoder` via Inference API.
11
+
12
+ ## Setup:
13
+
14
+ - Add your Hugging Face Inference API Token as a Secret:
15
+ - Name: HUGGINGFACEHUB_API_TOKEN
16
+ - Value: your Hugging Face token
17
+ - Then deploy and enjoy!
18
+
19
+ ✅ Free
20
+ ✅ No heavy compute needed
21
+ ✅ No fine-tuning needed
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from csv_agent import generate_code
5
+
6
+ st.set_page_config(page_title="CSV Assistant with Hugging Face", layout="wide")
7
+ st.title("🤖 CSV Assistant: Powered by Starcoder (Hugging Face)")
8
+
9
+ # Check for API token
10
+ if os.getenv("HUGGINGFACEHUB_API_TOKEN") is None:
11
+ st.error("❌ API Token missing! Please set HUGGINGFACEHUB_API_TOKEN in your Space Secrets.")
12
+ st.stop()
13
+
14
+ st.sidebar.title("Upload Your CSV")
15
+ uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"])
16
+
17
+ if uploaded_file:
18
+ df = pd.read_csv(uploaded_file)
19
+ st.write("### Preview of your data:", df.head())
20
+
21
+ prompt = st.text_input("💬 What would you like to do with the data?")
22
+ if prompt:
23
+ with st.spinner("Generating Python code using AI..."):
24
+ code = generate_code(prompt)
25
+ st.code(code, language="python")
26
+
27
+ try:
28
+ exec_globals = {"df": df}
29
+ exec(code, exec_globals)
30
+ df = exec_globals["df"]
31
+ st.success("✅ Data updated successfully!")
32
+ st.write(df.head())
33
+
34
+ # Allow download
35
+ csv = df.to_csv(index=False).encode('utf-8')
36
+ st.download_button("⬇️ Download Updated CSV", csv, "updated_data.csv", "text/csv")
37
+ except Exception as e:
38
+ st.error(f"⚠️ Error while executing AI-generated code: {e}")
csv_agent.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient(
5
+ model="bigcode/starcoder",
6
+ token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
7
+ )
8
+
9
+ def generate_code(prompt):
10
+ full_prompt = f"""You are a Python Data Scientist.
11
+ Given the following instruction, write Python pandas code to perform the task on a dataframe named df.
12
+ Instruction: {prompt}
13
+ Output only valid Python code. No explanations."""
14
+
15
+ response = client.text_generation(
16
+ prompt=full_prompt,
17
+ max_new_tokens=300,
18
+ temperature=0.2,
19
+ top_p=0.95,
20
+ repetition_penalty=1.1
21
+ )
22
+ return response
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ huggingface_hub