Spaces:

Garvitj
/

data

Sleeping

App Files Files Community

data / app.py

Garvitj

Update app.py

1b53616 verified 11 months ago

raw

history blame contribute delete

5.52 kB

	import streamlit as st
	import pandas as pd
	import json
	import subprocess
	try:
	import plotly.express as px
	except ModuleNotFoundError:
	subprocess.run(["pip", "install", "plotly"])
	import plotly.express as px
	import re
	import io
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Load DeepSeek Model
	tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
	model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")

	def query_deepseek(prompt):
	"""
	Query the DeepSeek model and return the response.
	"""
	inputs = tokenizer(prompt, return_tensors="pt")
	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=150)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response.strip()

	def extract_json(text):
	"""
	Extract JSON from the DeepSeek response using regex.
	"""
	match = re.search(r"\{.*\}", text, re.DOTALL)
	if match:
	try:
	return json.loads(match.group(0))
	except json.JSONDecodeError:
	st.error("JSON Decode Error!")
	return None
	return None

	def get_visualization_suggestion(data):
	"""
	Send dataset columns to DeepSeek and get suggestions for visualization.
	"""
	prompt = f"""
	I have the following dataset columns: {', '.join(data.columns)}.
	Suggest the best type of visualization for this dataset.
	Return only a valid JSON response in the following format:
	{{
	"x": "column_name",
	"y": "column_name",
	"chart_type": "bar/line/scatter/pie"
	}}
	"""
	response = query_deepseek(prompt)
	return extract_json(response)

	def extract_csv_from_response(response):
	"""
	Dynamically extract CSV data from a response string.
	"""
	lines = response.splitlines()
	csv_data = [line.strip() for line in lines if '"' in line and ',' in line]
	return '\n'.join(csv_data) if csv_data else None

	def generate_demo_data_csv(user_input, num_rows=10):
	"""Generates realistic demo data using the LLM in valid CSV format."""
	prompt = f"""
	Generate a structured dataset with {num_rows} rows based on the following request:
	"{user_input}"
	Ensure the response is in valid CSV format, with column headers and quoted text values.
	"""
	response = query_deepseek(prompt).strip()
	csv_data = extract_csv_from_response(response)

	if csv_data:
	try:
	df = pd.read_csv(io.StringIO(csv_data))
	file_path = "generated_data.csv"
	df.to_csv(file_path, index=False)
	return "Demo data generated as CSV.", file_path
	except Exception as e:
	return f"Error: Invalid CSV format. {str(e)}", None
	else:
	return "Error: No valid CSV data found in the response.", None

	def query_sql_generator(user_query):
	"""Generate SQL queries from natural language."""
	prompt = f"I just want a SQL Query corresponding to: {user_query} and no explanation."
	return query_deepseek(prompt)

	# Streamlit UI
	st.set_page_config(page_title="AI-Powered Dashboard", layout="wide")
	st.title("🤖 AI-Powered Multi-Feature Dashboard")

	# Sidebar for navigation
	st.sidebar.title("Navigation")
	option = st.sidebar.radio("Select Feature", ["📊 Data Visualization", "🧠 SQL Query Generator", "📄 Demo Data Generator"])

	if option == "📊 Data Visualization":
	uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)
	st.write("### Preview of Data")
	st.dataframe(df.head())

	with st.spinner("Getting visualization suggestions from DeepSeek..."):
	suggestion = get_visualization_suggestion(df)

	if suggestion:
	chart_type, x_col, y_col = suggestion.get("chart_type"), suggestion.get("x"), suggestion.get("y")

	if x_col not in df.columns or y_col not in df.columns:
	st.error("DeepSeek suggested invalid column names.")
	else:
	st.write(f"### Suggested Chart: {chart_type.capitalize()} Chart")
	chart_map = {
	"bar": px.bar,
	"line": px.line,
	"scatter": px.scatter,
	"pie": lambda df, x, y: px.pie(df, names=x, values=y)
	}
	if chart_type in chart_map:
	fig = chart_map[chart_type](df, x=x_col, y=y_col, title=f"{x_col} vs {y_col}")
	st.plotly_chart(fig)
	else:
	st.error("Unsupported chart type suggested.")

	elif option == "🧠 SQL Query Generator":
	text_input = st.text_area("Enter your Query here in Plain English:")
	if st.button("Generate SQL Query"):
	with st.spinner("Generating SQL Query..."):
	st.write(query_sql_generator(text_input))

	elif option == "📄 Demo Data Generator":
	user_input = st.text_area("Describe the dataset you want:")
	num_rows = st.number_input("Number of rows", min_value=1, max_value=1000, value=10)
	if st.button("Generate Dataset"):
	with st.spinner("Generating Demo Data..."):
	message, file_path = generate_demo_data_csv(user_input, num_rows)
	st.write(message)
	if file_path:
	st.download_button("Download CSV", open(file_path, "rb"), file_name="generated_data.csv", mime="text/csv")