ChatBotsTA commited on
Commit
01c52bf
·
verified ·
1 Parent(s): abd9dfa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +586 -112
app.py CHANGED
@@ -1,117 +1,591 @@
 
 
 
 
 
 
 
 
1
  import os
2
- import io
3
- import pdfplumber
4
- from huggingface_hub import InferenceClient
5
- from PIL import Image
 
 
 
6
  import gradio as gr
7
- import base64
8
-
9
- # ---------------- CONFIG ----------------
10
- LLAMA_MODEL = "Groq/Llama-3-Groq-8B-Tool-Use"
11
- TTS_MODEL = "espnet/kan-bayashi_ljspeech_vits"
12
- SDXL_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
13
-
14
- HF_TOKEN = os.environ.get("HF_TOKEN")
15
- GROQ_TOKEN = os.environ.get("GROQ_TOKEN")
16
-
17
- client = None
18
- if GROQ_TOKEN:
19
- client = InferenceClient(provider="groq", api_key=GROQ_TOKEN)
20
- elif HF_TOKEN:
21
- client = InferenceClient(api_key=HF_TOKEN)
22
-
23
- # ---------------- HELPERS ----------------
24
- def pdf_to_text(file):
25
- text_chunks = []
26
- pages = 0
27
- with pdfplumber.open(file) as pdf:
28
- pages = len(pdf.pages)
29
- for page in pdf.pages:
30
- ptext = page.extract_text() or ""
31
- text_chunks.append(ptext)
32
- return "\n\n".join(text_chunks), pages
33
-
34
- def llama_summarize(text):
35
- messages = [
36
- {"role": "system", "content": "You are a concise summarizer. Give 6 short bullet points."},
37
- {"role": "user", "content": f"Summarize this document in 6 concise bullet points:\n\n{text}"}
38
- ]
39
- resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
40
- return resp.choices[0].message["content"]
41
-
42
- def llama_chat(history, question):
43
- messages = history + [{"role": "user", "content": question}]
44
- resp = client.chat.completions.create(model=LLAMA_MODEL, messages=messages)
45
- return resp.choices[0].message["content"]
46
-
47
- def tts_synthesize(text):
48
- audio_bytes = client.text_to_speech(model=TTS_MODEL, inputs=text)
49
- return audio_bytes
50
-
51
- def generate_image(prompt_text):
52
- img_bytes = client.text_to_image(prompt_text, model=SDXL_MODEL)
53
- return Image.open(io.BytesIO(img_bytes))
54
-
55
- def ask_question_and_maybe_diagram(chat_text, question, history):
56
- if not history:
57
- history = [{"role": "system", "content": f"Document context:\n{chat_text[:4000]}"}]
58
- ans = llama_chat(history, question)
59
- history.append({"role": "user", "content": question})
60
- history.append({"role": "assistant", "content": ans})
61
-
62
- diagram_img = None
63
- if question.strip().lower().startswith("!diagram"):
64
- prompt = question[len("!diagram"):].strip()
65
- if prompt:
66
- diagram_img = generate_image(prompt)
67
- return ans, diagram_img, history
68
-
69
- # ---------------- GRADIO INTERFACE ----------------
70
- with gr.Blocks() as demo:
71
- gr.Markdown("## 📄 PDF Buddy — Summarize • Speak • Chat • Draw")
72
-
73
- with gr.Row():
74
- pdf_file = gr.File(label="Upload PDF", type="file")
75
- extract_status = gr.Textbox(label="Status")
76
-
77
- extracted_text = gr.Textbox(label="Document Preview", lines=10)
78
-
79
- with gr.Row():
80
- summarize_btn = gr.Button("📝 Summarize")
81
- summary_output = gr.Textbox(label="Summary", lines=6)
82
-
83
- tts_btn = gr.Button("🔊 Synthesize Summary to Audio")
84
- audio_out = gr.Audio(label="Audio", type="filepath")
85
-
86
- chat_question = gr.Textbox(label="Ask a question (use !diagram for image)")
87
- chat_btn = gr.Button("❓ Ask")
88
- chat_output = gr.Textbox(label="Answer")
89
- diagram_out = gr.Image(label="Diagram (optional)")
90
- chat_history_state = gr.State()
91
-
92
- # ---------------- CALLBACKS ----------------
93
- pdf_file.change(
94
- lambda f: pdf_to_text(f) if f else ("No file uploaded", "", None),
95
- inputs=pdf_file,
96
- outputs=[extract_status, extracted_text]
97
- )
98
-
99
- summarize_btn.click(
100
- lambda text: llama_summarize(text[:30000]) if text else "No text to summarize",
101
- inputs=extracted_text,
102
- outputs=summary_output
103
- )
104
-
105
- tts_btn.click(
106
- lambda summary: tts_synthesize(summary) if summary else None,
107
- inputs=summary_output,
108
- outputs=audio_out
109
  )
110
-
111
- chat_btn.click(
112
- ask_question_and_maybe_diagram,
113
- inputs=[extracted_text, chat_question, chat_history_state],
114
- outputs=[chat_output, diagram_out, chat_history_state]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- demo.launch()
 
 
 
 
 
 
 
1
+ """
2
+ PDF → Summary → Audio → Talk to PDF → Diagram
3
+ - Summarization: Groq (LLaMA 3)
4
+ - TTS: Deepgram (aura-asteria-en)
5
+ - Talk to PDF: Groq chat completions
6
+ - Diagram Generator: Stable Diffusion XL (Hugging Face Inference API)
7
+ """
8
+
9
  import os
10
+ import tempfile
11
+ import traceback
12
+ import time
13
+ from typing import List
14
+
15
+ import requests
16
+ import fitz # PyMuPDF
17
  import gradio as gr
18
+ from groq import Groq
19
+
20
+ # ================== Load API Keys ==================
21
+ try:
22
+ from google.colab import userdata
23
+ if not os.environ.get("LLAMA"):
24
+ val = userdata.get("LLAMA")
25
+ if val: os.environ["LLAMA"] = val.strip()
26
+ if not os.environ.get("DEEPGRAM"):
27
+ val = userdata.get("DEEPGRAM")
28
+ if val: os.environ["DEEPGRAM"] = val.strip()
29
+ if not os.environ.get("HF_TOKEN"):
30
+ val = userdata.get("HF_TOKEN")
31
+ if val: os.environ["HF_TOKEN"] = val.strip()
32
+ except Exception:
33
+ pass
34
+
35
+ # ================== Config ==================
36
+ CHUNK_CHARS = 20000
37
+ DEFAULT_GROQ_MODEL = "llama-3.1-8b-instant"
38
+ DEEPGRAM_TTS_MODEL = "aura-asteria-en"
39
+ DEEPGRAM_ENCODING = "mp3"
40
+ HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5"
41
+
42
+ # Global variable to store PDF text for Q&A
43
+ pdf_text_storage = {"text": "", "processed": False}
44
+
45
+ # ================== Utils ==================
46
+ def extract_text_from_pdf(file_path: str) -> str:
47
+ doc = fitz.open(file_path)
48
+ text = "\n\n".join(page.get_text("text") for page in doc)
49
+ doc.close()
50
+ return text.strip()
51
+
52
+ def chunk_text(text: str, max_chars: int) -> List[str]:
53
+ if not text:
54
+ return []
55
+ parts, start, L = [], 0, len(text)
56
+ while start < L:
57
+ end = min(start + max_chars, L)
58
+ if end < L:
59
+ back = text.rfind("\n", start, end)
60
+ if back == -1:
61
+ back = text.rfind(" ", start, end)
62
+ if back != -1 and back > start:
63
+ end = back
64
+ parts.append(text[start:end].strip())
65
+ start = end
66
+ return parts
67
+
68
+ # ================== Groq Summarization ==================
69
+ def summarize_chunk_via_groq(chunk_text: str, groq_client: Groq, model: str) -> str:
70
+ prompt = f"Summarize this text into a concise paragraph (~180 words max):\n\n{chunk_text}"
71
+ resp = groq_client.chat.completions.create(
72
+ model=model,
73
+ messages=[{"role": "user", "content": prompt}],
74
+ temperature=0.2,
75
+ max_tokens=800,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  )
77
+ return resp.choices[0].message.content.strip()
78
+
79
+ def summarize_document(extracted_text: str, groq_api_key: str, groq_model: str = DEFAULT_GROQ_MODEL) -> str:
80
+ client = Groq(api_key=groq_api_key)
81
+ if len(extracted_text) <= CHUNK_CHARS:
82
+ return summarize_chunk_via_groq(extracted_text, client, groq_model)
83
+ chunks = chunk_text(extracted_text, CHUNK_CHARS)
84
+ summaries = []
85
+ for ch in chunks:
86
+ try:
87
+ summaries.append(summarize_chunk_via_groq(ch, client, groq_model))
88
+ except Exception as e:
89
+ summaries.append(f"(error summarizing chunk: {str(e)})")
90
+ final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries)
91
+ resp = client.chat.completions.create(
92
+ model=groq_model,
93
+ messages=[{"role": "user", "content": final_prompt}],
94
+ temperature=0.2,
95
+ max_tokens=900,
96
  )
97
+ return resp.choices[0].message.content.strip()
98
+
99
+ # ================== Deepgram TTS ==================
100
+ def deepgram_tts(summary_text: str, deepgram_api_key: str, model: str = DEEPGRAM_TTS_MODEL, encoding: str = DEEPGRAM_ENCODING) -> str:
101
+ url = f"https://api.deepgram.com/v1/speak?model={model}&encoding={encoding}"
102
+ headers = {"Authorization": f"Token {deepgram_api_key}"}
103
+ payload = {"text": summary_text}
104
+ resp = requests.post(url, headers=headers, json=payload, timeout=120)
105
+ if resp.status_code >= 400:
106
+ raise RuntimeError(f"Deepgram TTS failed ({resp.status_code}): {resp.text}")
107
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{encoding}")
108
+ tmp.write(resp.content)
109
+ tmp.close()
110
+ return tmp.name
111
+
112
+ # ================== Talk to PDF (Separate Function) ==================
113
+ def ask_pdf_question(question: str, groq_key: str, model: str = DEFAULT_GROQ_MODEL) -> str:
114
+ if not pdf_text_storage["processed"]:
115
+ return "❌ Please process a PDF first before asking questions!"
116
+
117
+ if not question.strip():
118
+ return "❌ Please enter a question!"
119
+
120
+ if not groq_key.strip():
121
+ return "❌ Please provide your Groq API key!"
122
+
123
+ try:
124
+ client = Groq(api_key=groq_key)
125
+ prompt = f"Here is PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\n\nAnswer strictly based on PDF content. Be concise and specific."
126
+ resp = client.chat.completions.create(
127
+ model=model,
128
+ messages=[{"role": "user", "content": prompt}],
129
+ temperature=0,
130
+ max_tokens=500,
131
+ )
132
+ return f"🤖 {resp.choices[0].message.content.strip()}"
133
+ except Exception as e:
134
+ return f"❌ Error: {str(e)}"
135
+
136
+ # ================== Diagram via HF (Fixed) ==================
137
+ def generate_diagram(summary: str, hf_token: str, max_retries: int = 3) -> str:
138
+ headers = {"Authorization": f"Bearer {hf_token}"}
139
+ url = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}"
140
+
141
+ prompt = f"detailed technical diagram, infographic style, clean illustration of: {summary[:500]}"
142
+ payload = {"inputs": prompt}
143
+
144
+ for attempt in range(max_retries):
145
+ try:
146
+ resp = requests.post(url, headers=headers, json=payload, timeout=60)
147
+
148
+ if resp.status_code == 503:
149
+ try:
150
+ error_data = resp.json()
151
+ if "loading" in error_data.get("error", "").lower():
152
+ estimated_time = error_data.get("estimated_time", 20)
153
+ time.sleep(estimated_time)
154
+ continue
155
+ except:
156
+ pass
157
+
158
+ if resp.status_code == 200:
159
+ content_type = resp.headers.get('content-type', '')
160
+ if 'image' in content_type or len(resp.content) > 1000:
161
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
162
+ tmp.write(resp.content)
163
+ tmp.close()
164
+ return tmp.name
165
+
166
+ if attempt < max_retries - 1:
167
+ wait_time = (attempt + 1) * 10
168
+ time.sleep(wait_time)
169
+
170
+ except requests.exceptions.RequestException as e:
171
+ if attempt < max_retries - 1:
172
+ time.sleep((attempt + 1) * 5)
173
+
174
+ alternative_models = [
175
+ "stabilityai/stable-diffusion-xl-base-1.0",
176
+ "CompVis/stable-diffusion-v1-4"
177
+ ]
178
+
179
+ for alt_model in alternative_models:
180
+ try:
181
+ alt_url = f"https://api-inference.huggingface.co/models/{alt_model}"
182
+ resp = requests.post(alt_url, headers=headers, json=payload, timeout=60)
183
+
184
+ if resp.status_code == 200:
185
+ content_type = resp.headers.get('content-type', '')
186
+ if 'image' in content_type or len(resp.content) > 1000:
187
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
188
+ tmp.write(resp.content)
189
+ tmp.close()
190
+ return tmp.name
191
+ except Exception:
192
+ continue
193
+
194
+ return create_text_diagram_placeholder(summary)
195
+
196
+ def create_text_diagram_placeholder(summary: str) -> str:
197
+ try:
198
+ from PIL import Image, ImageDraw, ImageFont
199
+
200
+ width, height = 800, 600
201
+ img = Image.new('RGB', (width, height), color='#0a0a0a')
202
+ draw = ImageDraw.Draw(img)
203
+
204
+ try:
205
+ font = ImageFont.truetype("arial.ttf", 16)
206
+ title_font = ImageFont.truetype("arial.ttf", 20)
207
+ except:
208
+ font = ImageFont.load_default()
209
+ title_font = ImageFont.load_default()
210
+
211
+ draw.text((50, 50), "📊 Document Summary", fill='#00ff88', font=title_font)
212
+
213
+ words = summary.split()
214
+ lines = []
215
+ current_line = []
216
+ max_width = 45
217
+
218
+ for word in words:
219
+ if len(' '.join(current_line + [word])) <= max_width:
220
+ current_line.append(word)
221
+ else:
222
+ if current_line:
223
+ lines.append(' '.join(current_line))
224
+ current_line = [word]
225
+ if current_line:
226
+ lines.append(' '.join(current_line))
227
+
228
+ y_offset = 100
229
+ for line in lines[:18]:
230
+ draw.text((50, y_offset), line, fill='#ccffcc', font=font)
231
+ y_offset += 25
232
+
233
+ draw.rectangle([25, 25, width-25, height-25], outline='#00ff88', width=3)
234
+
235
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
236
+ img.save(tmp.name, "PNG")
237
+ tmp.close()
238
+ return tmp.name
239
+
240
+ except Exception:
241
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
242
+ tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode())
243
+ tmp.close()
244
+ return tmp.name
245
+
246
+ # ================== Main Pipeline ==================
247
+ def process_pdf_pipeline(pdf_file, groq_key, deepgram_key, hf_token, groq_model):
248
+ try:
249
+ if not groq_key.strip():
250
+ return "❌ Missing Groq API key!", None, None, "Process a PDF first!"
251
+ if not deepgram_key.strip():
252
+ return "❌ Missing Deepgram API key!", None, None, "Process a PDF first!"
253
+ if not hf_token.strip():
254
+ return "❌ Missing HuggingFace token!", None, None, "Process a PDF first!"
255
+ if pdf_file is None:
256
+ return "❌ Please upload a PDF file!", None, None, "Process a PDF first!"
257
+
258
+ pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file)
259
+
260
+ # Extract and store text globally
261
+ text = extract_text_from_pdf(pdf_path)
262
+ if not text.strip():
263
+ return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!"
264
+
265
+ # Store text for Q&A
266
+ pdf_text_storage["text"] = text
267
+ pdf_text_storage["processed"] = True
268
+
269
+ # Generate summary
270
+ summary = summarize_document(text, groq_api_key=groq_key, groq_model=groq_model or DEFAULT_GROQ_MODEL)
271
+
272
+ # Generate audio
273
+ audio_path = deepgram_tts(summary, deepgram_api_key=deepgram_key)
274
+
275
+ # Generate diagram
276
+ diagram_path = generate_diagram(summary, hf_token)
277
+
278
+ return summary, audio_path, diagram_path, "✅ PDF processed! You can now ask questions below."
279
+
280
+ except Exception as e:
281
+ pdf_text_storage["processed"] = False
282
+ return f"❌ Error: {str(e)}", None, None, "Process a PDF first!"
283
+
284
+ # ================== Gen-Z Dark Theme CSS ==================
285
+ GENZ_CSS = """
286
+ /* Main container styling */
287
+ .gradio-container {
288
+ background: linear-gradient(135deg, #000000 0%, #0a0a0a 100%) !important;
289
+ color: #00ff88 !important;
290
+ font-family: 'Segoe UI', 'Roboto', sans-serif !important;
291
+ }
292
+
293
+ body {
294
+ background: #000000 !important;
295
+ color: #00ff88 !important;
296
+ }
297
+
298
+ /* Input fields styling */
299
+ input, textarea, .gradio-textbox, .gradio-file, select {
300
+ background: linear-gradient(145deg, #111111, #1a1a1a) !important;
301
+ color: #00ff88 !important;
302
+ border: 2px solid #00ff88 !important;
303
+ border-radius: 12px !important;
304
+ box-shadow: 0 4px 15px rgba(0, 255, 136, 0.2) !important;
305
+ transition: all 0.3s ease !important;
306
+ }
307
+
308
+ input:focus, textarea:focus, .gradio-textbox:focus {
309
+ border-color: #00ff00 !important;
310
+ box-shadow: 0 0 25px rgba(0, 255, 136, 0.5) !important;
311
+ transform: translateY(-2px) !important;
312
+ }
313
+
314
+ /* Button styling */
315
+ button {
316
+ background: linear-gradient(145deg, #00ff88, #00cc66) !important;
317
+ color: #000000 !important;
318
+ border: none !important;
319
+ border-radius: 15px !important;
320
+ font-weight: bold !important;
321
+ text-transform: uppercase !important;
322
+ letter-spacing: 1px !important;
323
+ box-shadow: 0 6px 20px rgba(0, 255, 136, 0.3) !important;
324
+ transition: all 0.3s ease !important;
325
+ }
326
+
327
+ button:hover {
328
+ background: linear-gradient(145deg, #00cc66, #00ff88) !important;
329
+ transform: translateY(-3px) !important;
330
+ box-shadow: 0 8px 25px rgba(0, 255, 136, 0.5) !important;
331
+ }
332
+
333
+ button:active {
334
+ transform: translateY(1px) !important;
335
+ }
336
+
337
+ /* Headers and text */
338
+ h1, h2, h3, h4, .gradio-markdown {
339
+ color: #00ff88 !important;
340
+ text-shadow: 0 0 10px rgba(0, 255, 136, 0.3) !important;
341
+ }
342
+
343
+ h1 {
344
+ font-size: 2.5em !important;
345
+ background: linear-gradient(45deg, #00ff88, #00cc66) !important;
346
+ -webkit-background-clip: text !important;
347
+ -webkit-text-fill-color: transparent !important;
348
+ }
349
+
350
+ /* Tabs styling */
351
+ .gradio-tab {
352
+ background: linear-gradient(145deg, #111111, #1a1a1a) !important;
353
+ color: #00ff88 !important;
354
+ border: 2px solid #00ff88 !important;
355
+ border-radius: 10px !important;
356
+ }
357
+
358
+ .gradio-tab.selected {
359
+ background: linear-gradient(145deg, #00ff88, #00cc66) !important;
360
+ color: #000000 !important;
361
+ }
362
+
363
+ /* Slider styling */
364
+ .gradio-slider input[type="range"] {
365
+ background: #00ff88 !important;
366
+ }
367
+
368
+ .gradio-slider .gradio-slider-track {
369
+ background: #333333 !important;
370
+ }
371
+
372
+ .gradio-slider .gradio-slider-thumb {
373
+ background: #00ff88 !important;
374
+ border: 2px solid #00cc66 !important;
375
+ }
376
+
377
+ /* File upload area */
378
+ .gradio-file {
379
+ border: 3px dashed #00ff88 !important;
380
+ background: rgba(0, 255, 136, 0.1) !important;
381
+ border-radius: 15px !important;
382
+ }
383
+
384
+ /* Progress bar */
385
+ .progress-bar {
386
+ background: linear-gradient(90deg, #00ff88, #00cc66) !important;
387
+ border-radius: 10px !important;
388
+ }
389
+
390
+ /* Accordion styling */
391
+ .gradio-accordion {
392
+ background: linear-gradient(145deg, #111111, #1a1a1a) !important;
393
+ border: 2px solid #00ff88 !important;
394
+ border-radius: 12px !important;
395
+ }
396
+
397
+ /* Scrollbar */
398
+ ::-webkit-scrollbar {
399
+ width: 12px !important;
400
+ }
401
+
402
+ ::-webkit-scrollbar-track {
403
+ background: #111111 !important;
404
+ }
405
+
406
+ ::-webkit-scrollbar-thumb {
407
+ background: linear-gradient(145deg, #00ff88, #00cc66) !important;
408
+ border-radius: 6px !important;
409
+ }
410
+
411
+ /* Glowing effects */
412
+ .glow {
413
+ box-shadow: 0 0 20px rgba(0, 255, 136, 0.5) !important;
414
+ }
415
+
416
+ /* Custom animations */
417
+ @keyframes pulse {
418
+ 0% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
419
+ 50% { box-shadow: 0 0 30px rgba(0, 255, 136, 0.6); }
420
+ 100% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
421
+ }
422
+
423
+ .pulse-effect {
424
+ animation: pulse 2s infinite !important;
425
+ }
426
+ """
427
+
428
+ # ================== UI Build Function ==================
429
+ def build_ui():
430
+ env_groq = os.environ.get("LLAMA", "")
431
+ env_deepgram = os.environ.get("DEEPGRAM", "")
432
+ env_hf = os.environ.get("HF_TOKEN", "")
433
+
434
+ with gr.Blocks(css=GENZ_CSS, title="🔥 PDF AI Pipeline", theme=gr.themes.Base()) as demo:
435
+
436
+ # Header - Centered
437
+ gr.Markdown("""
438
+ <div style="text-align: center; margin: 20px 0;">
439
+ <h1 style="font-size: 3.5em; margin-bottom: 10px;">🔥 AI PDF PROCESSOR</h1>
440
+ <h2 style="font-size: 1.8em; margin-bottom: 10px;">Transform PDFs into Audio, Summaries & Interactive Q&A</h2>
441
+ <h3 style="font-size: 1.2em; font-style: italic; opacity: 0.9;"> PEC COHORT 3</h3>
442
+ </div>
443
+ """, elem_classes=["pulse-effect"])
444
+
445
+ with gr.Row():
446
+ # Left Column - Upload & API Settings
447
+ with gr.Column(scale=1):
448
+ with gr.Accordion("📁 UPLOAD PDF", open=True):
449
+ pdf_input = gr.File(
450
+ label="Drop your PDF here",
451
+ file_types=[".pdf"],
452
+ height=150
453
+ )
454
+
455
+ with gr.Accordion("🔑 API KEYS", open=False):
456
+ gr.Markdown("*Keep your keys secure • Use env vars in production*")
457
+ groq_key = gr.Textbox(
458
+ label="🤖 Groq API Key",
459
+ value=env_groq,
460
+ type="password",
461
+ placeholder="sk-..."
462
+ )
463
+ deepgram_key = gr.Textbox(
464
+ label="🎤 Deepgram API Key",
465
+ value=env_deepgram,
466
+ type="password",
467
+ placeholder="Enter Deepgram key"
468
+ )
469
+ hf_key = gr.Textbox(
470
+ label="🤗 HuggingFace Token",
471
+ value=env_hf,
472
+ type="password",
473
+ placeholder="hf_..."
474
+ )
475
+
476
+ with gr.Accordion("⚙️ SETTINGS", open=False):
477
+ groq_model = gr.Dropdown(
478
+ label="🧠 AI Model",
479
+ choices=[
480
+ "llama-3.1-8b-instant",
481
+ "llama-3.1-70b-versatile",
482
+ "mixtral-8x7b-32768",
483
+ "gemma2-9b-it"
484
+ ],
485
+ value=DEFAULT_GROQ_MODEL
486
+ )
487
+
488
+ # Main Process Button
489
+ process_btn = gr.Button(
490
+ "🚀 PROCESS PDF",
491
+ variant="primary",
492
+ size="lg",
493
+ elem_classes=["pulse-effect"]
494
+ )
495
+
496
+ # Right Column - Results
497
+ with gr.Column(scale=2):
498
+ with gr.Tabs():
499
+ with gr.Tab("📝 SUMMARY"):
500
+ summary_output = gr.Textbox(
501
+ label="AI Generated Summary",
502
+ lines=12,
503
+ placeholder="Your PDF summary will appear here...",
504
+ interactive=False
505
+ )
506
+
507
+ with gr.Tab("🔊 AUDIO"):
508
+ audio_output = gr.Audio(
509
+ label="Listen to Summary",
510
+ type="filepath",
511
+ interactive=False
512
+ )
513
+
514
+ with gr.Tab("🎨 DIAGRAM"):
515
+ diagram_output = gr.Image(
516
+ label="Visual Representation",
517
+ interactive=False,
518
+ height=400
519
+ )
520
+
521
+ # Separate Q&A Section
522
+ gr.Markdown("---")
523
+ gr.Markdown("## 💬 CHAT WITH YOUR PDF")
524
+
525
+ with gr.Row():
526
+ with gr.Column(scale=3):
527
+ question_input = gr.Textbox(
528
+ label="Ask anything about your PDF",
529
+ placeholder="What are the main findings? • Who are the key people mentioned? • Summarize chapter 2...",
530
+ lines=2
531
+ )
532
+ with gr.Column(scale=1):
533
+ ask_btn = gr.Button("📨 SEND", variant="secondary", size="lg")
534
+
535
+ chat_output = gr.Textbox(
536
+ label="🤖 AI Response",
537
+ lines=8,
538
+ placeholder="Upload and process a PDF first, then ask your questions!",
539
+ interactive=False
540
+ )
541
+
542
+ # Status indicator
543
+ status_output = gr.Textbox(
544
+ label="📊 Status",
545
+ value="Ready to process PDF...",
546
+ interactive=False
547
+ )
548
+
549
+ # Footer
550
+ gr.Markdown("""
551
+ ---
552
+ **🔥 Pro Tips:**
553
+ • Upload PDFs with extractable text (not image-only)
554
+ • Questions work only after processing
555
+ • Audio generation takes ~30-60 seconds
556
+ • Diagrams may take longer depending on HF API load
557
+
558
+ *Built with ❤️ for the AI generation*
559
+ """)
560
+
561
+ # Event handlers
562
+ process_btn.click(
563
+ fn=process_pdf_pipeline,
564
+ inputs=[pdf_input, groq_key, deepgram_key, hf_key, groq_model],
565
+ outputs=[summary_output, audio_output, diagram_output, status_output],
566
+ show_progress=True
567
+ )
568
+
569
+ ask_btn.click(
570
+ fn=ask_pdf_question,
571
+ inputs=[question_input, groq_key, groq_model],
572
+ outputs=[chat_output],
573
+ show_progress=False
574
+ )
575
+
576
+ # Enter key support for questions
577
+ question_input.submit(
578
+ fn=ask_pdf_question,
579
+ inputs=[question_input, groq_key, groq_model],
580
+ outputs=[chat_output]
581
+ )
582
+
583
+ return demo
584
 
585
+ if __name__ == "__main__":
586
+ demo = build_ui()
587
+ demo.launch(
588
+ share=True,
589
+ debug=True,
590
+ show_error=True
591
+ )