triflix commited on
Commit
a420762
·
verified ·
1 Parent(s): 4fb4e58

Create backend/app.py

Browse files
Files changed (1) hide show
  1. backend/app.py +153 -0
backend/app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -----------------------------
2
+ # Imports
3
+ # -----------------------------
4
+ import pandas as pd
5
+ from google.colab import files
6
+ import os
7
+ import json
8
+ from google import genai
9
+ from google.genai import types
10
+
11
+ # -----------------------------
12
+ # Initialize Gemini client (global)
13
+ # -----------------------------
14
+ client = genai.Client(
15
+ api_key="AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs"
16
+ )
17
+
18
+ # -----------------------------
19
+ # Upload Excel file
20
+ # -----------------------------
21
+ uploaded = files.upload()
22
+ file_name = list(uploaded.keys())[0]
23
+ file_path = "/content/" + file_name
24
+ df = pd.read_excel(file_name)
25
+
26
+ # -----------------------------
27
+ # Extract Metadata
28
+ # -----------------------------
29
+ def get_metadata(df):
30
+ return {
31
+ "columns": list(df.columns),
32
+ "dtypes": df.dtypes.apply(str).to_dict(),
33
+ "null_counts": df.isnull().sum().to_dict(),
34
+ "unique_counts": df.nunique().to_dict(),
35
+ "sample_rows": df.head(3).to_dict(orient="records")
36
+ }
37
+
38
+ metadata = get_metadata(df)
39
+ print("Metadata extracted:")
40
+ print(metadata)
41
+
42
+ # -----------------------------
43
+ # Generate JSON summary and suggestions from metadata
44
+ # -----------------------------
45
+ def generate_metadata_analysis(metadata):
46
+ metadata_text = str(metadata)
47
+ model = "gemini-2.5-flash-lite"
48
+
49
+ contents = [
50
+ types.Content(
51
+ role="user",
52
+ parts=[types.Part.from_text(
53
+ text=f"Analyze the following structured data metadata:\n{metadata_text}"
54
+ )],
55
+ ),
56
+ ]
57
+
58
+ generate_content_config = types.GenerateContentConfig(
59
+ thinking_config=types.ThinkingConfig(thinking_budget=0),
60
+ response_mime_type="application/json",
61
+ system_instruction=[types.Part.from_text(text="""
62
+ You are a structured data analysis AI.
63
+
64
+ 1️⃣ Summary:
65
+ Provide a concise description of:
66
+ - What kind of data this is
67
+ - What it likely represents
68
+ - Its domain or use-case
69
+ Indicate assumptions if needed.
70
+
71
+ 2️⃣ Suggestions:
72
+ Provide up to three actionable analyses and visualizations based on the metadata, specifying columns and insight type.
73
+
74
+ Output must be strict JSON:
75
+ {
76
+ "Summary": "<short summary>",
77
+ "Suggestion": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
78
+ }
79
+ """)],
80
+ )
81
+
82
+ output_text = ""
83
+ for chunk in client.models.generate_content_stream(
84
+ model=model,
85
+ contents=contents,
86
+ config=generate_content_config,
87
+ ):
88
+ output_text += chunk.text
89
+
90
+ return output_text
91
+
92
+ # Call metadata analysis
93
+ agent1summary_json = generate_metadata_analysis(metadata)
94
+ agent1summary = json.loads(agent1summary_json)
95
+ print("\nMetadata analysis JSON:")
96
+ print(agent1summary)
97
+
98
+ # -----------------------------
99
+ # User selects one suggestion
100
+ # -----------------------------
101
+ print("\nSelect one of the following suggestions (type 1, 2, or 3):")
102
+ for i, suggestion in enumerate(agent1summary["Suggestion"], 1):
103
+ print(f"{i}. {suggestion}")
104
+
105
+ selected_index = int(input("Your selection: "))
106
+ command = agent1summary["Suggestion"][selected_index - 1]
107
+ print("\nSelected command:")
108
+ print(command)
109
+
110
+ # -----------------------------
111
+ # Strict JSON output generator for visualization
112
+ # -----------------------------
113
+ MODEL = "gemini-2.5-flash-lite"
114
+
115
+ system_prompt_text = f"""
116
+ You are a Python assistant that MUST return output strictly in JSON format and NOTHING else.
117
+ The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation".
118
+ Requirements:
119
+ - "type": visualization type ("bar", "pie", "line", etc.)
120
+ - "code": Python code as a string that prints numeric JSON to stdout. Use this for data access: df = pd.read_excel(r"{file_path}")
121
+ - "explanation": one-sentence description
122
+ """
123
+
124
+ def generate_visualization():
125
+ contents = [types.Content(role="user", parts=[types.Part.from_text(text=command)])]
126
+
127
+ generate_content_config = types.GenerateContentConfig(
128
+ thinking_config=types.ThinkingConfig(thinking_budget=0),
129
+ response_mime_type="application/json",
130
+ system_instruction=[types.Part.from_text(text=system_prompt_text)],
131
+ )
132
+
133
+ output = ""
134
+ for chunk in client.models.generate_content_stream(
135
+ model=MODEL,
136
+ contents=contents,
137
+ config=generate_content_config,
138
+ ):
139
+ output += chunk.text
140
+
141
+ return output
142
+
143
+ # Call visualization generator
144
+ agent2code = generate_visualization()
145
+ print("\nStrict JSON for visualization:")
146
+ print(agent2code)
147
+
148
+ # -----------------------------
149
+ # Execute generated visualization code
150
+ # -----------------------------
151
+ agent2code_json = json.loads(agent2code)
152
+ code_to_run = agent2code_json.get("code", "")
153
+ final_frontend_output = exec(code_to_run)