asad9641 commited on
Commit
10f89e3
·
verified ·
1 Parent(s): 7bf8d4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +322 -172
app.py CHANGED
@@ -1,220 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ------------------ Gradio UI ------------------
2
  with gr.Blocks() as demo:
3
- # --- THEME CSS: Blue -> Purple Neon with readable text ---
4
  gr.HTML(r"""
5
  <style>
6
- /* PAGE BACKGROUND */
7
  body { background: linear-gradient(135deg, #eef2ff 0%, #f5e8ff 100%) !important; }
8
-
9
- /* ---------------- Tabs ---------------- */
10
- div[data-testid="tab-list"] button,
11
- .tabs button {
12
- background: linear-gradient(90deg, #0ea5e9, #7c3aed);
13
- color: white !important;
14
- font-weight: 700;
15
- border-radius: 12px 12px 0 0;
16
- padding: 10px 18px;
17
- margin-right: 6px;
18
- border: none;
19
- box-shadow: 0 6px 18px rgba(124,58,237,0.18);
20
- transition: transform 0.18s ease, box-shadow 0.18s ease;
21
- }
22
- div[data-testid="tab-list"] button:hover,
23
- .tabs button:hover { transform: translateY(-3px); }
24
-
25
- div[data-testid="tab-list"] button[aria-selected="true"],
26
- .tabs button[aria-selected="true"] {
27
- background: linear-gradient(90deg, #60a5fa, #a78bfa);
28
- box-shadow: 0 8px 24px rgba(99,102,241,0.28);
29
- }
30
-
31
- /* ---------------- Global headers / markdown ---------------- */
32
- .gradio-container h2, .gradio-container h3, .gradio-container h4,
33
- .gradio-container .markdown { color: #1e1e1e; }
34
-
35
- /* Custom title styling */
36
- .app-title {
37
- font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial;
38
- font-size: 22px;
39
- color: #1f1f1f;
40
- padding: 10px 14px;
41
- border-radius: 10px;
42
- background: linear-gradient(90deg, rgba(14,165,233,0.08), rgba(124,58,237,0.06));
43
- display: inline-block;
44
- }
45
-
46
- /* ---------------- Buttons ---------------- */
47
- .gr-button, .gradio-button, button {
48
- background: linear-gradient(90deg, #06b6d4, #7c3aed) !important;
49
- color: white !important;
50
- border: none !important;
51
- box-shadow: 0 8px 20px rgba(124,58,237,0.18);
52
- border-radius: 10px !important;
53
- padding: 8px 14px !important;
54
- font-weight: 700 !important;
55
- transition: transform 0.12s ease, box-shadow 0.12s ease;
56
- }
57
- .gr-button:hover, .gradio-button:hover, button:hover { transform: translateY(-3px); }
58
-
59
- /* ---------------- Chat bubbles ---------------- */
60
- .chatbot .message.user { background: #e4e7ff !important; color: #1f1f1f !important; border-radius: 16px 16px 16px 4px; padding: 10px 12px; }
61
- .chatbot .message.assistant { background: #f1e4ff !important; color: #1f1f1f !important; border-radius: 16px 16px 4px 16px; padding: 10px 12px; }
62
-
63
- /* Inputs and boxes readable */
64
- input, textarea, .gr-textbox, .gr-textbox textarea {
65
- background: #ffffff !important;
66
- color: #1e1e1e !important;
67
- border: 1px solid #cfcfcf !important;
68
- border-radius: 8px !important;
69
- }
70
-
71
- /* ---------------- Mic button fixes ---------------- */
72
- #mic_box {
73
- position: relative !important;
74
- display: flex !important;
75
- flex-direction: column !important;
76
- align-items: center !important;
77
- justify-content: center !important;
78
- background: #ffffff !important;
79
- border: 2px solid #d3c7ff !important;
80
- border-radius: 16px !important;
81
- padding: 10px !important;
82
- width: 70px;
83
- height: 70px;
84
- }
85
- #mic_box button {
86
- background: #6d28d9 !important;
87
- border-radius: 50% !important;
88
- width: 60px !important;
89
- height: 60px !important;
90
- display: flex !important;
91
- align-items: center !important;
92
- justify-content: center !important;
93
- border: none !important;
94
- box-shadow: 0 0 10px rgba(109,40,217,0.4) !important;
95
- }
96
- #mic_box button svg {
97
- width: 28px !important;
98
- height: 28px !important;
99
- fill: #ffffff !important;
100
- stroke: #ffffff !important;
101
- }
102
- #mic_box button:hover {
103
- background: #8b5cf6 !important;
104
- box-shadow: 0 0 14px rgba(139,92,246,0.6) !important;
105
- }
106
- #mic_box::after {
107
- content: "Tap to Record";
108
- display: block;
109
- text-align: center;
110
- font-size: 12px;
111
- margin-top: 6px;
112
- color: #4b4b4b;
113
- }
114
  </style>
115
  """)
116
 
117
- # Title area with colorful headline
118
- gr.HTML("""
119
- <div style='display:flex;align-items:center;gap:14px'>
120
- <div class='app-title'>
121
- <strong>🛠 Multi-Mode AI Assistant</strong>
122
- <div style='font-size:14px;color:#4b4b4b'>Voice · PDF · Image — Blue·Purple Neon Theme</div>
123
- </div>
124
- </div>
125
- """)
126
 
127
  session_voice = gr.State(str(uuid.uuid4()))
128
  session_pdf = gr.State(str(uuid.uuid4()))
129
  session_image = gr.State(str(uuid.uuid4()))
130
 
 
131
  with gr.Tab("🎤 Voice Chat"):
132
- gr.HTML("""
133
- <div style='margin-bottom:6px;'>
134
- <h3 style='margin:0;padding:0;color:#1f1f1f'>🎤 Voice Chat — Speak naturally, get voice & text responses</h3>
135
- <p style='margin:2px 0 6px;color:#333;font-size:13px'>
136
- Hold and speak, ask general or knowledge-document questions. Enable enhancer for richer answers.
137
- </p>
138
- </div>
139
- """)
140
  chat_voice = gr.Chatbot(height=320)
141
  with gr.Row():
142
- mic = gr.Audio(type="filepath", label="", elem_id="mic_box")
143
  audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
144
- tts_lang = gr.Dropdown(choices=["en", "ur"], value="en", label="TTS Language")
145
-
146
  with gr.Row():
147
  btn_general = gr.Button("⚡Ask General 🎯")
148
  btn_pdf = gr.Button("⚡Ask PDF 📄")
149
  btn_image = gr.Button("⚡Ask Image 🖼")
150
- enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale=1)
151
- tone_dropdown = gr.Dropdown(choices=["Helpful", "Formal", "Friendly"], value="Helpful", label="Enhancer Tone", scale=1)
152
  with gr.Row():
153
- btn_reset_logs = gr.Button("♻ Reset LOGs", elem_id='reset_logs')
154
- btn_download_logs = gr.Button("📥 Download Summary", elem_id='download_logs')
155
  Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False)
156
-
157
  answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
158
 
159
- btn_general.click(fn=handle_voice_general,
160
- inputs=[mic, session_voice, tts_lang, enhancer_toggle, tone_dropdown],
161
- outputs=[answer_voice, audio_output, chat_voice])
162
- btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
163
- btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
164
- btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
165
- btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
166
 
167
- # PDF Tab
168
  with gr.Tab("📄 PDF Summarizer"):
169
- gr.HTML("""
170
- <div style='margin-bottom:6px;'>
171
- <h3 style='margin:0;padding:0;color:#1f1f1f'>📄 PDF Summarizer — Upload a PDF, ask questions</h3>
172
- <p style='margin:2px 0 6px;color:#333;font-size:13px'>
173
- Uploads are chunked and embedded so you can ask targeted questions about the document.
174
- </p>
175
- </div>
176
- """)
177
  pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
178
  with gr.Row():
179
- pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"], scale=1)
180
  pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
181
  pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
182
  with gr.Row():
183
  pdf_send_btn = gr.Button("Ask (Questions)")
184
  pdf_reset_btn = gr.Button("♻ Reset LOGs")
185
  with gr.Row():
186
- pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1)
187
  pdf_download_btn = gr.Button("📥 Download Summary")
188
- pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
189
- pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
190
- pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
191
- pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
192
 
193
- # Image Tab
194
  with gr.Tab("🖼 Image OCR"):
195
- gr.HTML("""
196
- <div style='margin-bottom:6px;'>
197
- <h3 style='margin:0;padding:0;color:#1f1f1f'>🖼 Image OCR — Extract text from images</h3>
198
- <p style='margin:2px 0 6px;color:#333;font-size:13px'>
199
- Upload an image, OCR runs, then ask questions about the extracted text.
200
- </p>
201
- </div>
202
- """)
203
  image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
204
  with gr.Row():
205
- image_upload_btn = gr.File(label="Upload Image", file_types=[".png", ".jpg", ".jpeg"], scale=1)
206
  image_question = gr.Textbox(label="Ask question about Image", lines=3)
207
  image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
208
  with gr.Row():
209
  image_send_btn = gr.Button("Ask (Questions)")
210
  image_reset_btn = gr.Button("♻ Reset LOGs")
211
  with gr.Row():
212
- image_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1)
213
  image_download_btn = gr.Button("📥 Download Summary")
214
- image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
215
- image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
216
- image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
217
- image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
218
 
219
- if __name__ == "__main__":
220
- demo.launch()
 
1
+ # app.py
2
+ """
3
+ Multi-Mode AI Assistant (Voice, PDF, Image) with Wow-Factor Features
4
+ - Preserves original functionality
5
+ - UI: Blue → Purple Neon Theme
6
+ - Fixes mic button, readable text, input placeholders
7
+ """
8
+ import os
9
+ import uuid
10
+ import tempfile
11
+ import requests
12
+ from dotenv import load_dotenv
13
+ from gtts import gTTS
14
+ from PyPDF2 import PdfReader
15
+ import gradio as gr
16
+ from sentence_transformers import SentenceTransformer, util
17
+ from fpdf import FPDF
18
+ from datetime import datetime
19
+
20
+ # ------------------ Load API Keys ------------------
21
+ load_dotenv()
22
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
23
+ OCR_SPACE_API_KEY = os.getenv("OCR_SPACE_API_KEY", "").strip()
24
+
25
+ if not GROQ_API_KEY:
26
+ raise ValueError("❌ GROQ_API_KEY missing. Set it in env / Hugging Face Secrets.")
27
+ if not OCR_SPACE_API_KEY:
28
+ raise ValueError("❌ OCR_SPACE_API_KEY missing. Set it in env / Hugging Face Secrets.")
29
+
30
+ HEADERS = {"Authorization": f"Bearer {GROQ_API_KEY}"}
31
+
32
+ # ------------------ Global State ------------------
33
+ SESSION_HISTORY = {}
34
+ CHAT_DISPLAY = {}
35
+ PDF_CONTENT = {}
36
+ PDF_EMBEDS = {}
37
+ IMAGE_TEXT = {}
38
+ IMAGE_EMBEDS = {}
39
+ CHUNK_SIZE = 1500
40
+
41
+ # Load embedding model
42
+ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
43
+
44
+ # ------------------ Helpers ------------------
45
+ def _get_path_from_gr_file(gr_file):
46
+ if not gr_file:
47
+ return None
48
+ if isinstance(gr_file, str) and os.path.exists(gr_file):
49
+ return gr_file
50
+ try:
51
+ if hasattr(gr_file, "name") and os.path.exists(gr_file.name):
52
+ return gr_file.name
53
+ except Exception:
54
+ pass
55
+ if isinstance(gr_file, dict):
56
+ for key in ("name", "file_name", "filepath"):
57
+ if key in gr_file:
58
+ candidate = gr_file.get(key)
59
+ if isinstance(candidate, str) and os.path.exists(candidate):
60
+ return candidate
61
+ return None
62
+
63
+ def chunk_text(text, size=CHUNK_SIZE):
64
+ return [text[i:i + size] for i in range(0, len(text), size)]
65
+
66
+ def synthesize_speech(text, lang="en"):
67
+ try:
68
+ if not text:
69
+ return None
70
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
71
+ gTTS(text=text, lang=lang).save(tmp.name)
72
+ return tmp.name
73
+ except Exception as e:
74
+ print("TTS error:", e)
75
+ return None
76
+
77
+ def select_relevant_chunk(question, chunks, chunk_embeds):
78
+ if not chunks or chunk_embeds is None:
79
+ return ""
80
+ q_emb = embed_model.encode(question, convert_to_tensor=True)
81
+ scores = util.cos_sim(q_emb, chunk_embeds)[0]
82
+ top_idx = int(scores.argmax().item())
83
+ return chunks[top_idx]
84
+
85
+ def _chat_display_to_messages(chat_display):
86
+ msgs = []
87
+ for user, assistant in chat_display:
88
+ msgs.append({"role": "user", "content": user})
89
+ msgs.append({"role": "assistant", "content": assistant})
90
+ return msgs
91
+
92
+ # ------------------ Transcription & LLM ------------------
93
+ def transcribe_audio(audio_path):
94
+ if not audio_path or not os.path.exists(audio_path):
95
+ return "Error: audio file missing."
96
+ try:
97
+ url = "https://api.groq.com/openai/v1/audio/transcriptions"
98
+ with open(audio_path, "rb") as f:
99
+ files = {"file": (os.path.basename(audio_path), f, "audio/wav")}
100
+ data = {"model": "whisper-large-v3"}
101
+ resp = requests.post(url, headers=HEADERS, files=files, data=data, timeout=60)
102
+ resp.raise_for_status()
103
+ return resp.json().get("text", "") or ""
104
+ except Exception as e:
105
+ print("transcription error:", e)
106
+ return f"Error transcribing audio: {e}"
107
+
108
+ def groq_chat_completion(messages):
109
+ body = {"model": "llama-3.1-8b-instant", "messages": messages}
110
+ try:
111
+ resp = requests.post("https://api/groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
112
+ resp.raise_for_status()
113
+ return resp.json()["choices"][0]["message"]["content"]
114
+ except Exception as e:
115
+ print("groq_chat_completion error:", e)
116
+ return f"Error generating response: {e}"
117
+
118
+ def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_tone="Helpful"):
119
+ if session_id not in SESSION_HISTORY:
120
+ SESSION_HISTORY[session_id] = []
121
+
122
+ SESSION_HISTORY[session_id].append({"role": "user", "content": user_text})
123
+ messages = [{"role": "system", "content": "You are a helpful AI assistant."}] + SESSION_HISTORY[session_id]
124
+
125
+ if enhancer_enabled:
126
+ messages.append({"role": "user", "content": f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"})
127
+
128
+ assistant_text = groq_chat_completion(messages)
129
+ SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
130
+ return assistant_text
131
+
132
+ # ------------------ PDF handling ------------------
133
+ def handle_pdf_upload(pdf_file, session_id):
134
+ path = _get_path_from_gr_file(pdf_file)
135
+ if not path:
136
+ return "No file uploaded or file unreadable."
137
+ try:
138
+ reader = PdfReader(path)
139
+ text = ""
140
+ for page in reader.pages:
141
+ text += (page.extract_text() or "") + "\n"
142
+ if not text.strip():
143
+ return "No extractable content found in PDF."
144
+ chunks = chunk_text(text)
145
+ PDF_CONTENT[session_id] = chunks
146
+ PDF_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
147
+ return f"PDF processed: {len(chunks)} chunks ready."
148
+ except Exception as e:
149
+ print("PDF upload error:", e)
150
+ return f"Error processing PDF: {e}"
151
+
152
+ def handle_pdf_question(question, session_id):
153
+ if session_id not in PDF_CONTENT:
154
+ return "Document not found. Upload first."
155
+ chunk = select_relevant_chunk(question, PDF_CONTENT[session_id], PDF_EMBEDS[session_id])
156
+ messages = [
157
+ {"role": "system", "content": "You are a helpful assistant summarizing PDF content."},
158
+ {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
159
+ ]
160
+ assistant_text = groq_chat_completion(messages)
161
+ assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
162
+ if session_id not in SESSION_HISTORY:
163
+ SESSION_HISTORY[session_id] = []
164
+ SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
165
+ return assistant_text
166
+
167
+ # ------------------ Image OCR ------------------
168
+ def ocr_space_file(image_path, api_key, language="eng"):
169
+ if not image_path or not os.path.exists(image_path):
170
+ return ""
171
+ try:
172
+ with open(image_path, "rb") as f:
173
+ payload = {"apikey": api_key, "language": language}
174
+ files = {"file": f}
175
+ r = requests.post("https://api.ocr.space/parse/image", files=files, data=payload, timeout=60)
176
+ r.raise_for_status()
177
+ j = r.json()
178
+ if j.get("IsErroredOnProcessing"):
179
+ print("OCR.space processing error:", j)
180
+ return ""
181
+ parsed = [pr.get("ParsedText", "") for pr in j.get("ParsedResults", [])]
182
+ return "\n".join(parsed)
183
+ except Exception as e:
184
+ print("ocr_space_file error:", e)
185
+ return ""
186
+
187
+ def handle_image_upload(image_file, session_id):
188
+ path = _get_path_from_gr_file(image_file)
189
+ if not path:
190
+ return "No image uploaded or file unreadable.", ""
191
+ parsed = ocr_space_file(path, OCR_SPACE_API_KEY)
192
+ if not parsed.strip():
193
+ return "No extractable text found in the image.", ""
194
+ chunks = chunk_text(parsed)
195
+ IMAGE_TEXT[session_id] = chunks
196
+ IMAGE_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
197
+ return f"Image processed: {len(chunks)} chunks ready.", ""
198
+
199
+ def handle_image_question(question, session_id):
200
+ if session_id not in IMAGE_TEXT:
201
+ return "Image not found. Upload first."
202
+ chunk = select_relevant_chunk(question, IMAGE_TEXT[session_id], IMAGE_EMBEDS[session_id])
203
+ messages = [
204
+ {"role": "system", "content": "You are a helpful assistant summarizing image text."},
205
+ {"role": "user", "content": f"Image chunk:\n{chunk}\n\nQuestion: {question}"}
206
+ ]
207
+ assistant_text = groq_chat_completion(messages)
208
+ assistant_text = f"**Snippet from Image:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
209
+ if session_id not in SESSION_HISTORY:
210
+ SESSION_HISTORY[session_id] = []
211
+ SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
212
+ return assistant_text
213
+
214
+ # ------------------ PDF Generation ------------------
215
+ def generate_pdf_file(text, filename_prefix="summary"):
216
+ pdf = FPDF()
217
+ pdf.add_page()
218
+ pdf.set_auto_page_break(auto=True, margin=15)
219
+ pdf.set_font("Arial", "B", size=14)
220
+ pdf.multi_cell(0, 8, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
221
+ pdf.set_font("Arial", size=12)
222
+ for line in text.split("\n"):
223
+ pdf.multi_cell(0, 6, line)
224
+ file_path = f"/tmp/{filename_prefix}_{uuid.uuid4()}.pdf"
225
+ pdf.output(file_path)
226
+ return file_path
227
+
228
+ def download_pdf_summary(session_id):
229
+ summary_text = "\n".join([m["content"] for m in SESSION_HISTORY.get(session_id, []) if m["role"]=="assistant"])
230
+ if not summary_text:
231
+ summary_text = "No summary available."
232
+ return generate_pdf_file(summary_text, "summary")
233
+
234
+ # ------------------ Voice & Chat Handlers ------------------
235
+ def _append_chat_display(session_id, user_text, assistant_text):
236
+ if session_id not in CHAT_DISPLAY:
237
+ CHAT_DISPLAY[session_id] = []
238
+ CHAT_DISPLAY[session_id].append((user_text, assistant_text))
239
+
240
+ def handle_voice_general(audio_file, session_id, tts_lang="en", enhancer_enabled=False, enhancer_tone="Helpful"):
241
+ path = _get_path_from_gr_file(audio_file)
242
+ if not path:
243
+ return "No audio provided.", None, []
244
+ user_text = transcribe_audio(path)
245
+ assistant_text = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
246
+ _append_chat_display(session_id, user_text, assistant_text)
247
+ audio_path = synthesize_speech(assistant_text, lang=tts_lang)
248
+ return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
249
+
250
+ def handle_voice_pdf(audio_file, session_id, tts_lang="en"):
251
+ path = _get_path_from_gr_file(audio_file)
252
+ if not path:
253
+ return "No audio provided.", None, []
254
+ user_text = transcribe_audio(path)
255
+ assistant_text = handle_pdf_question(user_text, session_id)
256
+ _append_chat_display(session_id, user_text, assistant_text)
257
+ audio_path = synthesize_speech(assistant_text, lang=tts_lang)
258
+ return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
259
+
260
+ def handle_voice_image(audio_file, session_id, tts_lang="en"):
261
+ path = _get_path_from_gr_file(audio_file)
262
+ if not path:
263
+ return "No audio provided.", None, []
264
+ user_text = transcribe_audio(path)
265
+ assistant_text = handle_image_question(user_text, session_id)
266
+ _append_chat_display(session_id, user_text, assistant_text)
267
+ audio_path = synthesize_speech(assistant_text, lang=tts_lang)
268
+ return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
269
+
270
+ def handle_text_general(user_text, session_id, enhancer_enabled=False, enhancer_tone="Helpful"):
271
+ assistant = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
272
+ _append_chat_display(session_id, user_text, assistant)
273
+ return assistant, _chat_display_to_messages(CHAT_DISPLAY[session_id])
274
+
275
+ def handle_text_pdf(question, session_id):
276
+ return handle_pdf_question(question, session_id)
277
+
278
+ def handle_text_image(question, session_id):
279
+ return handle_image_question(question, session_id)
280
+
281
  # ------------------ Gradio UI ------------------
282
  with gr.Blocks() as demo:
283
+ # ---- Theme CSS ----
284
  gr.HTML(r"""
285
  <style>
 
286
  body { background: linear-gradient(135deg, #eef2ff 0%, #f5e8ff 100%) !important; }
287
+ .app-title, .gradio-container h3, .gradio-container h4, .gradio-container .markdown { color: #1f1f1f !important; }
288
+ #mic_box button { background:#6d28d9; width:60px;height:60px;border-radius:50%; display:flex; align-items:center; justify-content:center; }
289
+ #mic_box button svg { fill:#fff; stroke:#fff; width:28px;height:28px; }
290
+ #mic_box::after { content:"Tap to Record"; display:block; text-align:center; font-size:12px; margin-top:6px; color:#4b4b4b; }
291
+ input, textarea, .gr-textbox { background:#fff; color:#1e1e1e; border:1px solid #cfcfcf; border-radius:8px; }
292
+ .gr-chatbot .message.user { background:#e4e7ff; color:#1f1f1f; }
293
+ .gr-chatbot .message.assistant { background:#f1e4ff; color:#1f1f1f; }
294
+ .gr-button { color:#fff; font-weight:600; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  </style>
296
  """)
297
 
298
+ # ---- Title ----
299
+ gr.HTML("""<div style='display:flex;align-items:center;gap:14px'>
300
+ <div class='app-title'><strong>🛠 Multi-Mode AI Assistant</strong>
301
+ <div style='font-size:13px;color:#4b4b4b'>Voice · PDF · Image — Blue·Purple Neon Theme</div></div>
302
+ </div>""")
 
 
 
 
303
 
304
  session_voice = gr.State(str(uuid.uuid4()))
305
  session_pdf = gr.State(str(uuid.uuid4()))
306
  session_image = gr.State(str(uuid.uuid4()))
307
 
308
+ # ---- Voice Chat Tab ----
309
  with gr.Tab("🎤 Voice Chat"):
 
 
 
 
 
 
 
 
310
  chat_voice = gr.Chatbot(height=320)
311
  with gr.Row():
312
+ mic = gr.Audio(type="filepath", label="🎤 Record Voice", elem_id="mic_box")
313
  audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False)
314
+ tts_lang = gr.Dropdown(choices=["en","ur"], value="en", label="TTS Language")
 
315
  with gr.Row():
316
  btn_general = gr.Button("⚡Ask General 🎯")
317
  btn_pdf = gr.Button("⚡Ask PDF 📄")
318
  btn_image = gr.Button("⚡Ask Image 🖼")
319
+ enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False)
320
+ tone_dropdown = gr.Dropdown(choices=["Helpful","Formal","Friendly"], value="Helpful", label="Enhancer Tone")
321
  with gr.Row():
322
+ btn_reset_logs = gr.Button("♻ Reset LOGs")
323
+ btn_download_logs = gr.Button("📥 Download Summary")
324
  Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False)
 
325
  answer_voice = gr.Textbox(label="Assistant Answer (text)", lines=2, visible=False)
326
 
327
+ btn_general.click(handle_voice_general, [mic, session_voice, tts_lang, enhancer_toggle, tone_dropdown],
328
+ [answer_voice, audio_output, chat_voice])
329
+ btn_pdf.click(handle_voice_pdf, [mic, session_pdf, tts_lang], [answer_voice, audio_output, chat_voice])
330
+ btn_image.click(handle_voice_image, [mic, session_image, tts_lang], [answer_voice, audio_output, chat_voice])
331
+ btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), [session_voice, chat_voice, mic, audio_output, answer_voice])
332
+ btn_download_logs.click(download_pdf_summary, [session_voice], [Voice_summary_file])
 
333
 
334
+ # ---- PDF Tab ----
335
  with gr.Tab("📄 PDF Summarizer"):
 
 
 
 
 
 
 
 
336
  pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5)
337
  with gr.Row():
338
+ pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"])
339
  pdf_question = gr.Textbox(label="Ask a question about PDF (text)", lines=3)
340
  pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
341
  with gr.Row():
342
  pdf_send_btn = gr.Button("Ask (Questions)")
343
  pdf_reset_btn = gr.Button("♻ Reset LOGs")
344
  with gr.Row():
345
+ pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False)
346
  pdf_download_btn = gr.Button("📥 Download Summary")
347
+ pdf_upload_btn.upload(handle_pdf_upload, [pdf_upload_btn, session_pdf], [pdf_upload_msg])
348
+ pdf_send_btn.click(handle_text_pdf, [pdf_question, session_pdf], [pdf_output])
349
+ pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), [session_pdf, pdf_output])
350
+ pdf_download_btn.click(download_pdf_summary, [session_pdf], [pdf_summary_file])
351
 
352
+ # ---- Image Tab ----
353
  with gr.Tab("🖼 Image OCR"):
 
 
 
 
 
 
 
 
354
  image_output = gr.Textbox(label="Answer (Text Only)", lines=5)
355
  with gr.Row():
356
+ image_upload_btn = gr.File(label="Upload Image", file_types=[".png",".jpg",".jpeg"])
357
  image_question = gr.Textbox(label="Ask question about Image", lines=3)
358
  image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
359
  with gr.Row():
360
  image_send_btn = gr.Button("Ask (Questions)")
361
  image_reset_btn = gr.Button("♻ Reset LOGs")
362
  with gr.Row():
363
+ image_summary_file = gr.File(label="📥Download Summary File", interactive=False)
364
  image_download_btn = gr.Button("📥 Download Summary")
365
+ image_upload_btn.upload(handle_image_upload, [image_upload_btn, session_image], [image_upload_msg])
366
+ image_send_btn.click(handle_text_image, [image_question, session_image], [image_output])
367
+ image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), [session_image, image_output])
368
+ image_download_btn.click(download_pdf_summary, [session_image], [image_summary_file])
369
 
370
+ demo.launch()