shvy commited on
Commit
ac2b2fb
·
verified ·
1 Parent(s): 63d9676
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import openai # For LLM integration
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import io
7
+
8
+ # OpenAI API Key (Replace with env variable in production)
9
+ openai.api_key = "YOUR_OPENAI_API_KEY"
10
+
11
+ def analyze_dataset(file):
12
+ # Load dataset
13
+ df = pd.read_csv(file.name)
14
+
15
+ # Generate summary statistics
16
+ summary = df.describe().to_string()
17
+ missing_values = df.isnull().sum().to_string()
18
+ duplicates = df.duplicated().sum()
19
+
20
+ # Prompt LLM for insights
21
+ prompt = f"""
22
+ Given the following dataset summary:
23
+ {summary}
24
+ Missing Values:
25
+ {missing_values}
26
+ Duplicate Entries: {duplicates}
27
+
28
+ Provide a structured analysis, visualization suggestions, and cleaning strategies.
29
+ """
30
+ response = openai.ChatCompletion.create(
31
+ model="gpt-4",
32
+ messages=[{"role": "system", "content": "You are a data science expert."},
33
+ {"role": "user", "content": prompt}]
34
+ )
35
+ insights = response["choices"][0]["message"]["content"]
36
+
37
+ # Generate visualizations
38
+ fig, ax = plt.subplots(figsize=(6, 4))
39
+ sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax)
40
+ buf = io.BytesIO()
41
+ plt.savefig(buf, format='png')
42
+ buf.seek(0)
43
+
44
+ return insights, buf
45
+
46
+ # Gradio UI
47
+ demo = gr.Interface(
48
+ fn=analyze_dataset,
49
+ inputs=gr.File(type="file"),
50
+ outputs=[gr.Textbox(label="Analysis"), gr.Image(label="Correlation Heatmap")],
51
+ title="LLM-Powered Data Analyzer",
52
+ description="Upload a dataset and get automatic insights, visualizations, and cleaning suggestions."
53
+ )
54
+
55
+ demo.launch()