asrarbw commited on
Commit
e15f4cd
·
verified ·
1 Parent(s): ffc4dcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -50
app.py CHANGED
@@ -1,70 +1,250 @@
1
  import gradio as gr
 
 
 
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
 
24
 
25
- response = ""
 
 
 
 
 
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
 
 
 
 
 
 
 
30
  stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
  ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
41
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
-
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
  demo.launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import json
5
  from huggingface_hub import InferenceClient
6
 
7
+ # ===============================
8
+ # LLM CLIENT
9
+ # ===============================
10
+ HF_TOKEN = os.getenv("HF")
11
 
12
+ # client = InferenceClient(
13
+ # model="Qwen/Qwen2.5-7B-Instruct",
14
+ # token=HF_TOKEN
15
+ # )
16
+ client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct", token=HF_TOKEN)
 
 
 
 
 
 
 
 
17
 
18
+ # ===============================
19
+ # SAFETY
20
+ # ===============================
21
+ BLOCKED_KEYWORDS = [
22
+ "import os", "import sys", "subprocess",
23
+ "open(", "eval(", "exec(", "__",
24
+ "socket", "requests"
25
+ ]
26
 
27
+ # ===============================
28
+ # AGENT: DECIDE MODE
29
+ # ===============================
30
+ def decide_mode(user_question, df):
31
+ prompt = f"""
32
+ You are an expert data analysis agent.
33
 
34
+ Dataset columns:
35
+ {list(df.columns)}
36
 
37
+ Decide how to answer the user's question.
38
+
39
+ Choose ONLY ONE mode:
40
+ - code_and_insight → requires exact computation
41
+ - insight_only → qualitative reasoning only
42
+
43
+ Respond ONLY in valid JSON:
44
+ {{
45
+ "mode": "code_and_insight | insight_only",
46
+ "needs_code_visible": true | false
47
+ }}
48
 
49
+ Set needs_code_visible = true ONLY if the user explicitly asks for code.
50
+
51
+ User question:
52
+ {user_question}
53
+ """
54
+ response = ""
55
+ for chunk in client.chat_completion(
56
+ messages=[{"role": "user", "content": prompt}],
57
+ max_tokens=150,
58
+ temperature=0,
59
  stream=True,
 
 
60
  ):
61
+ if chunk.choices and chunk.choices[0].delta.content:
62
+ response += chunk.choices[0].delta.content
63
+ return response
64
+
65
+
66
+ # ===============================
67
+ # AGENT: CODE GEN + EXEC
68
+ # ===============================
69
+ def generate_and_run_code(user_question, df, retries=1):
70
+ column_info = {col: str(dtype) for col, dtype in df.dtypes.items()}
71
+ last_error = None
72
+
73
+ for attempt in range(retries + 1):
74
+ planner_prompt = f"""
75
+ You are a Python data analyst.
76
+
77
+ Dataset columns and types:
78
+ {column_info}
79
+
80
+ Rules:
81
+ - Use pandas only
82
+ - Dataframe name: df
83
+ - Store final output in variable named: result
84
+ - No explanations
85
+ - No markdown
86
+ - No imports
87
+
88
+ User question:
89
+ {user_question}
90
+ """
91
+
92
+ if attempt > 0:
93
+ planner_prompt += f"\nPrevious error:\n{last_error}\nFix the code."
94
+
95
+ code = ""
96
+ for chunk in client.chat_completion(
97
+ messages=[{"role": "user", "content": planner_prompt}],
98
+ max_tokens=400,
99
+ temperature=0.2,
100
+ stream=True,
101
+ ):
102
+ if chunk.choices and chunk.choices[0].delta.content:
103
+ code += chunk.choices[0].delta.content
104
+
105
+ if any(bad in code for bad in BLOCKED_KEYWORDS):
106
+ return None, None, "Unsafe code detected"
107
+
108
+ local_env = {"df": df, "result": None}
109
+ try:
110
+ exec(code, {}, local_env)
111
+ return code, local_env["result"], None
112
+ except Exception as e:
113
+ last_error = str(e)
114
+
115
+ return None, None, last_error
116
+
117
+
118
+ # ===============================
119
+ # CORE CHATBOT
120
+ # ===============================
121
+ def analyze_excel(message, history, file):
122
+
123
+ if file is None:
124
+ yield "⚠️ Please upload an Excel file first."
125
+ return
126
 
127
+ user_question = message["content"] if isinstance(message, dict) else message
 
128
 
129
+ try:
130
+ df = pd.read_excel(file.name, engine="openpyxl")
131
 
132
+ # 🧠 Decide mode
133
+ decision_raw = decide_mode(user_question, df)
134
+
135
+ try:
136
+ decision = json.loads(decision_raw)
137
+ except Exception:
138
+ yield "❌ Unable to interpret the request. Please rephrase."
139
+ return
140
+
141
+ mode = decision["mode"]
142
+ show_code = decision["needs_code_visible"]
143
+
144
+ # ===============================
145
+ # CODE + INSIGHT MODE
146
+ # ===============================
147
+ if mode == "code_and_insight":
148
+ yield "🧠 Running analysis…"
149
+
150
+ code, result, error = generate_and_run_code(
151
+ user_question=user_question,
152
+ df=df,
153
+ retries=1
154
+ )
155
+
156
+ if error:
157
+ yield f"❌ Computation failed: {error}"
158
+ return
159
+
160
+ # Build insight prompt
161
+ insight_prompt = f"""
162
+ You are a senior data analyst.
163
+
164
+ User question:
165
+ {user_question}
166
+
167
+ Computed result:
168
+ {result}
169
+
170
+ Explain the insight clearly in natural language.
171
+ Focus on meaning and implications.
172
  """
173
+
174
+ response = ""
175
+ if show_code:
176
+ response += f"🧾 Generated Python code:\n\n```python\n{code}\n```\n\n"
177
+
178
+ for chunk in client.chat_completion(
179
+ messages=[{"role": "user", "content": insight_prompt}],
180
+ max_tokens=350,
181
+ temperature=0.4,
182
+ stream=True,
183
+ ):
184
+ if chunk.choices and chunk.choices[0].delta.content:
185
+ response += chunk.choices[0].delta.content
186
+ yield response
187
+ return
188
+
189
+ # ===============================
190
+ # INSIGHT ONLY MODE
191
+ # ===============================
192
+ summary = f"""
193
+ Rows: {len(df)}
194
+ Columns: {list(df.columns)}
195
+ Missing values:
196
+ {df.isnull().sum().to_string()}
197
+ """
198
+
199
+ insight_prompt = f"""
200
+ Dataset summary:
201
+ {summary}
202
+
203
+ User question:
204
+ {user_question}
205
+
206
+ Provide high-level analytical insights.
207
+ Do not compute exact numbers.
208
+ Do not generate code.
209
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ response = ""
212
+ for chunk in client.chat_completion(
213
+ messages=[{"role": "user", "content": insight_prompt}],
214
+ max_tokens=400,
215
+ temperature=0.4,
216
+ stream=True,
217
+ ):
218
+ if chunk.choices and chunk.choices[0].delta.content:
219
+ response += chunk.choices[0].delta.content
220
+ yield response
221
+
222
+ except Exception as e:
223
+ yield f"❌ Error: {str(e)}"
224
+
225
+
226
+ # ===============================
227
+ # UI
228
+ # ===============================
229
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
230
+ gr.Markdown("# 📊 Excel Analysis Agent (Code + Insight)")
231
+
232
+ excel_file = gr.File(
233
+ label="Upload Excel File",
234
+ file_types=[".xlsx"]
235
+ )
236
+
237
+ gr.ChatInterface(
238
+ fn=analyze_excel,
239
+ additional_inputs=[excel_file],
240
+ type="messages",
241
+ examples=[
242
+ ["Which provider has the highest average claim amount?", None],
243
+ ["How many unique members are there?", None],
244
+ ["Explain trends in processing time across regions.", None],
245
+ ["Show python code to compute correlation between ClaimAmount and ProcessingCost.", None],
246
+ ],
247
+ )
248
 
249
  if __name__ == "__main__":
250
  demo.launch()