Eyadddddddd commited on
Commit
5b726d6
·
verified ·
1 Parent(s): 5ecf5f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -86
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import base64
3
  import tempfile
 
4
 
5
  import gradio as gr
6
  from groq import Groq
@@ -8,24 +9,32 @@ from gradio_client import Client
8
  from pdf2image import convert_from_path
9
  from PIL import Image
10
 
11
- # =========================
12
- # Groq setup
13
- # =========================
 
 
 
 
 
14
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
15
  if not GROQ_API_KEY:
16
  raise ValueError("GROQ_API_KEY environment variable is not set.")
17
 
18
- client = Groq(api_key=GROQ_API_KEY)
 
19
  TEXT_MODEL = "llama-3.1-8b-instant"
20
 
21
- # =========================
22
- # Qwen3-VL Demo Space
23
- # =========================
 
 
24
  qwen_client = Client("Qwen/Qwen3-VL-Demo")
25
 
26
- # =========================
27
- # Modes and prompts
28
- # =========================
29
  MODE_PROMPTS = {
30
  "Normal Chat": (
31
  "You are NeoHelper, Eyad’s branded assistant. "
@@ -45,28 +54,74 @@ MODE_PROMPTS = {
45
  ),
46
  }
47
 
48
- # =========================
49
- # Helpers
50
- # =========================
 
 
51
  def encode_image(path: str) -> str:
 
 
 
 
 
 
 
52
  with open(path, "rb") as f:
53
  return base64.b64encode(f.read()).decode("utf-8")
54
 
55
 
56
- def extract_images_from_pdf(pdf_path: str):
 
 
 
 
 
 
 
57
  try:
58
  pages = convert_from_path(pdf_path)
59
- image_paths = []
60
- for page in pages:
 
61
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
62
  page.save(tmp.name, "PNG")
63
  image_paths.append(tmp.name)
 
64
  return image_paths
65
  except Exception:
66
  return None
67
 
68
 
69
- def call_qwen3vl(image_path, prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  try:
71
  result = qwen_client.predict(
72
  "/add_message",
@@ -75,111 +130,211 @@ def call_qwen3vl(image_path, prompt):
75
  "files": [image_path],
76
  },
77
  )
 
78
  chatbot_state = result[1]
79
- return chatbot_state[-1][1] if chatbot_state else "No response."
 
 
 
80
  except Exception as e:
81
  return f"⚠️ Vision model error: {str(e)}"
82
 
83
 
84
- def call_groq_text(message, system_prompt):
85
- try:
86
- resp = client.chat.completions.create(
87
- model=TEXT_MODEL,
88
- messages=[
89
- {"role": "system", "content": system_prompt},
90
- {"role": "user", "content": message},
91
- ],
92
- max_tokens=700,
93
- )
94
- return resp.choices[0].message.content
95
- except Exception as e:
96
- return f"⚠️ Text model error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- # =========================
99
- # Main chat function
100
- # =========================
101
  def chat_fn(message, history, file, mode):
102
- system_prompt = MODE_PROMPTS.get(mode, MODE_PROMPTS["Normal Chat"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
 
104
  if isinstance(file, list):
105
  file = file[0] if file else None
106
 
 
 
 
107
  if file is None:
 
 
108
  return call_groq_text(message, system_prompt)
109
 
 
 
 
110
  file_path = file.name
111
- file_ext = file_path.lower()
 
 
112
 
113
- # Image
114
- if file_ext.endswith((".png", ".jpg", ".jpeg", ".webp")):
 
 
115
  try:
116
- prompt = message or "Explain this image."
117
  return call_qwen3vl(file_path, prompt)
118
  except Exception as e:
119
  return f"⚠️ Error analyzing image: {str(e)}"
120
 
121
- # PDF
 
 
122
  if file_ext.endswith(".pdf"):
123
  try:
124
  image_paths = extract_images_from_pdf(file_path)
125
  if not image_paths:
126
- return "This PDF contains no images I can analyze."
127
 
128
- all_analyses = []
129
- for img_path in image_paths:
130
- prompt = message or "Explain this PDF page."
131
- result = call_qwen3vl(img_path, prompt)
132
- all_analyses.append(result)
 
 
 
133
 
134
- summary_prompt = (
135
- "Summarize these PDF page analyses into one clear explanation "
136
- "for a student:\n\n" + "\n\n---\n\n".join(all_analyses)
137
- )
138
-
139
- return call_groq_text(summary_prompt, system_prompt)
140
 
141
  except Exception as e:
142
  return f"⚠️ Error processing PDF: {str(e)}"
143
 
144
- return "Unsupported file type. Please upload an image or PDF."
 
 
 
145
 
146
- # =========================
147
- # Gradio UI
148
- # =========================
149
- with gr.Blocks(title="NeoHelper") as demo:
150
- gr.Markdown("## 🧠 NeoHelper — Text + Image + PDF", elem_id="title")
151
 
152
- mode_dd = gr.Dropdown(
153
- choices=list(MODE_PROMPTS.keys()),
154
- value="Normal Chat",
155
- label="Mode",
156
- )
157
 
158
- file_input = gr.File(
159
- label="Upload image or PDF (optional)",
160
- file_types=["image", ".pdf"],
161
- every=True,
162
- )
163
 
164
- chat = gr.ChatInterface(
165
- fn=chat_fn,
166
- additional_inputs=[file_input, mode_dd],
167
- chatbot=gr.Chatbot(show_label=False),
168
- title="NeoHelper",
169
- description=None,
170
- examples=None,
171
- api_name="neohelper_chat",
172
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- demo.launch(
175
- theme=gr.themes.Base().set(
176
- body_background_fill="#1c1c1c",
177
- body_text_color="#f0f0f0",
178
- block_background_fill="#2a2a2a",
179
- block_border_color="#444",
180
- block_label_text_color="#f0f0f0",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  button_primary_background_fill="#4a90e2",
182
  button_primary_text_color="#ffffff",
 
 
 
 
183
  )
184
- )
185
- demo.launch()
 
 
 
 
 
1
  import os
2
  import base64
3
  import tempfile
4
+ from typing import List, Optional, Tuple
5
 
6
  import gradio as gr
7
  from groq import Groq
 
9
  from pdf2image import convert_from_path
10
  from PIL import Image
11
 
12
+
13
+ # ============================================================
14
+ # CONFIG & GLOBALS
15
+ # ============================================================
16
+
17
+ # ----------------------------
18
+ # 1. Groq setup (text model)
19
+ # ----------------------------
20
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
21
  if not GROQ_API_KEY:
22
  raise ValueError("GROQ_API_KEY environment variable is not set.")
23
 
24
+ groq_client = Groq(api_key=GROQ_API_KEY)
25
+
26
  TEXT_MODEL = "llama-3.1-8b-instant"
27
 
28
+ # ----------------------------
29
+ # 2. Qwen3-VL Demo (vision)
30
+ # ----------------------------
31
+ # Uses the public Hugging Face Space:
32
+ # https://huggingface.co/spaces/Qwen/Qwen3-VL-Demo
33
  qwen_client = Client("Qwen/Qwen3-VL-Demo")
34
 
35
+ # ----------------------------
36
+ # 3. Modes and system prompts
37
+ # ----------------------------
38
  MODE_PROMPTS = {
39
  "Normal Chat": (
40
  "You are NeoHelper, Eyad’s branded assistant. "
 
54
  ),
55
  }
56
 
57
+
58
+ # ============================================================
59
+ # HELPER FUNCTIONS
60
+ # ============================================================
61
+
62
  def encode_image(path: str) -> str:
63
+ """
64
+ Read an image file and return a base64-encoded string.
65
+
66
+ NOTE: This is kept for possible future use (e.g., if you switch
67
+ back to a vision API that needs base64). The current Qwen3-VL
68
+ demo integration sends file paths directly, not base64.
69
+ """
70
  with open(path, "rb") as f:
71
  return base64.b64encode(f.read()).decode("utf-8")
72
 
73
 
74
+ def extract_images_from_pdf(pdf_path: str) -> Optional[List[str]]:
75
+ """
76
+ Convert all pages of a PDF into PNG images.
77
+
78
+ Returns:
79
+ - list of temporary image file paths on success
80
+ - None on failure
81
+ """
82
  try:
83
  pages = convert_from_path(pdf_path)
84
+ image_paths: List[str] = []
85
+
86
+ for i, page in enumerate(pages):
87
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
88
  page.save(tmp.name, "PNG")
89
  image_paths.append(tmp.name)
90
+
91
  return image_paths
92
  except Exception:
93
  return None
94
 
95
 
96
+ def call_groq_text(message: str, system_prompt: str) -> str:
97
+ """
98
+ Send a pure text query to Groq's Llama 3.1 8B Instant model.
99
+ """
100
+ try:
101
+ resp = groq_client.chat.completions.create(
102
+ model=TEXT_MODEL,
103
+ messages=[
104
+ {"role": "system", "content": system_prompt},
105
+ {"role": "user", "content": message},
106
+ ],
107
+ max_tokens=900,
108
+ )
109
+ return resp.choices[0].message.content
110
+ except Exception as e:
111
+ return f"⚠️ Text model error: {str(e)}"
112
+
113
+
114
+ def call_qwen3vl(image_path: str, prompt: str) -> str:
115
+ """
116
+ Call the Qwen3-VL demo Space with an image + text prompt.
117
+
118
+ Args:
119
+ image_path: local path to an image file
120
+ prompt: text instruction/question
121
+
122
+ Returns:
123
+ String answer from the vision model.
124
+ """
125
  try:
126
  result = qwen_client.predict(
127
  "/add_message",
 
130
  "files": [image_path],
131
  },
132
  )
133
+ # result = [multimodal_input_state, chatbot_state]
134
  chatbot_state = result[1]
135
+ if not chatbot_state:
136
+ return "No response from vision model."
137
+ # chatbot_state is like [[user_msg, assistant_msg], ...]
138
+ return chatbot_state[-1][1]
139
  except Exception as e:
140
  return f"⚠️ Vision model error: {str(e)}"
141
 
142
 
143
+ def build_system_prompt(mode: str) -> str:
144
+ """
145
+ Get the system prompt for the selected mode.
146
+ """
147
+ return MODE_PROMPTS.get(mode, MODE_PROMPTS["Normal Chat"])
148
+
149
+
150
+ def summarize_pdf_analyses(
151
+ analyses: List[str],
152
+ system_prompt: str,
153
+ ) -> str:
154
+ """
155
+ Summarize multiple page-level analyses into one explanation.
156
+ Uses Groq text model for the final summary.
157
+ """
158
+ joined = "\n\n--- PAGE BREAK ---\n\n".join(analyses)
159
+ summary_prompt = (
160
+ "Summarize these PDF page analyses into one clear explanation for a student. "
161
+ "Be structured, simple, and concise:\n\n"
162
+ f"{joined}"
163
+ )
164
+ return call_groq_text(summary_prompt, system_prompt)
165
+
166
+
167
+ # ============================================================
168
+ # CORE BACKEND: chat_fn
169
+ # ============================================================
170
 
 
 
 
171
  def chat_fn(message, history, file, mode):
172
+ """
173
+ Main NeoHelper backend function.
174
+
175
+ Signature is compatible with gr.ChatInterface:
176
+
177
+ fn(message: str, history: list, *additional_inputs)
178
+
179
+ Behavior:
180
+ - If no file:
181
+ -> Use Groq text model with the selected mode.
182
+ - If image:
183
+ -> Use Qwen3-VL demo (vision).
184
+ - If PDF:
185
+ -> Convert to images, analyze each page with Qwen3-VL,
186
+ then summarize with Groq.
187
+ """
188
+ system_prompt = build_system_prompt(mode)
189
+
190
+ # Normalize message
191
+ if message is None:
192
+ message = ""
193
+ message = message.strip()
194
 
195
+ # Normalize file input (Gradio sometimes passes [file])
196
  if isinstance(file, list):
197
  file = file[0] if file else None
198
 
199
+ # -----------------------------------------
200
+ # TEXT-ONLY MODE (Groq)
201
+ # -----------------------------------------
202
  if file is None:
203
+ if not message:
204
+ return "Please type a question or upload a file."
205
  return call_groq_text(message, system_prompt)
206
 
207
+ # -----------------------------------------
208
+ # FILE MODE (image or PDF)
209
+ # -----------------------------------------
210
  file_path = file.name
211
+ # Some platforms provide orig_name (user-facing name)
212
+ orig_name = getattr(file, "orig_name", file_path)
213
+ file_ext = orig_name.lower()
214
 
215
+ # -----------------------------------------
216
+ # IMAGE MODE (Qwen3-VL)
217
+ # -----------------------------------------
218
+ if file_ext.endswith((".png", ".jpg", ".jpeg", ".webp", ".bmp")):
219
  try:
220
+ prompt = message or "Explain this image in a clear way."
221
  return call_qwen3vl(file_path, prompt)
222
  except Exception as e:
223
  return f"⚠️ Error analyzing image: {str(e)}"
224
 
225
+ # -----------------------------------------
226
+ # PDF MODE (Qwen3-VL + Groq summary)
227
+ # -----------------------------------------
228
  if file_ext.endswith(".pdf"):
229
  try:
230
  image_paths = extract_images_from_pdf(file_path)
231
  if not image_paths:
232
+ return "This PDF contains no pages I can analyze."
233
 
234
+ page_analyses: List[str] = []
235
+ for idx, img_path in enumerate(image_paths, start=1):
236
+ prompt = (
237
+ message
238
+ or f"Explain page {idx} of this PDF in a simple way for a student."
239
+ )
240
+ page_answer = call_qwen3vl(img_path, prompt)
241
+ page_analyses.append(f"Page {idx}:\n{page_answer}")
242
 
243
+ # Summarize across all pages with Groq
244
+ final_summary = summarize_pdf_analyses(page_analyses, system_prompt)
245
+ return final_summary
 
 
 
246
 
247
  except Exception as e:
248
  return f"⚠️ Error processing PDF: {str(e)}"
249
 
250
+ # -----------------------------------------
251
+ # UNSUPPORTED FILE TYPE
252
+ # -----------------------------------------
253
+ return "Unsupported file type. Please upload an image or a PDF."
254
 
 
 
 
 
 
255
 
256
+ # ============================================================
257
+ # UI (Gradio ChatInterface, Dark Theme, NeoHelper Branding)
258
+ # ============================================================
 
 
259
 
260
+ def build_ui() -> gr.Blocks:
261
+ """
262
+ Build the full NeoHelper UI with:
 
 
263
 
264
+ - Dark theme (applied at launch)
265
+ - NeoHelper branding
266
+ - No logo
267
+ - No suggestions/examples
268
+ - Mode dropdown
269
+ - Optional file upload (image/PDF)
270
+ - ChatInterface layout
271
+ """
272
+ with gr.Blocks(title="NeoHelper") as demo:
273
+ # Header
274
+ gr.Markdown(
275
+ """
276
+ # 🧠 NeoHelper
277
+ **Multimodal assistant with:**
278
+ - Groq Llama 3.1 8B (text)
279
+ - Qwen3-VL Demo (images + PDFs)
280
+ """,
281
+ elem_id="title",
282
+ )
283
+
284
+ with gr.Row():
285
+ with gr.Column(scale=1):
286
+ mode_dd = gr.Dropdown(
287
+ choices=list(MODE_PROMPTS.keys()),
288
+ value="Normal Chat",
289
+ label="Mode",
290
+ )
291
+
292
+ file_input = gr.File(
293
+ label="Upload image or PDF (optional)",
294
+ file_types=["image", ".pdf"],
295
+ every=True,
296
+ )
297
 
298
+ with gr.Column(scale=3):
299
+ # Core ChatInterface
300
+ gr.ChatInterface(
301
+ fn=chat_fn,
302
+ additional_inputs=[file_input, mode_dd],
303
+ chatbot=gr.Chatbot(
304
+ show_label=False,
305
+ ),
306
+ title="NeoHelper",
307
+ description=None, # no suggestions
308
+ examples=None, # no examples
309
+ api_name="neohelper_chat",
310
+ )
311
+
312
+ return demo
313
+
314
+
315
+ # ============================================================
316
+ # MAIN
317
+ # ============================================================
318
+
319
+ if __name__ == "__main__":
320
+ app = build_ui()
321
+
322
+ dark_theme = gr.themes.Base().set(
323
+ body_background_fill="#121212",
324
+ body_text_color="#f5f5f5",
325
+ block_background_fill="#1e1e1e",
326
+ block_border_color="#333333",
327
+ block_label_text_color="#f5f5f5",
328
  button_primary_background_fill="#4a90e2",
329
  button_primary_text_color="#ffffff",
330
+ button_secondary_background_fill="#333333",
331
+ button_secondary_text_color="#f5f5f5",
332
+ input_background_fill="#1a1a1a",
333
+ input_border_color="#444444",
334
  )
335
+
336
+ app.launch(
337
+ theme=dark_theme,
338
+ show_api=False, # no logo/API docs panel
339
+ share=False,
340
+ )