asad9641's picture
Update app.py
8e224e2 verified
# app.py
"""
Multi-Mode AI Assistant (Voice, PDF, Image) with Ultra Neon FX
- Preserves all original logic and functionality
- Visual tweaks: Background lightened (~30%), stronger header and labels,
brighter mic icon, increased button glow, neon borders, animations, rounded chat boxes,
soft shadows. NO functional changes.
"""
import os
import uuid
import tempfile
import requests
from dotenv import load_dotenv
from gtts import gTTS
from PyPDF2 import PdfReader
import gradio as gr
from sentence_transformers import SentenceTransformer, util
from fpdf import FPDF
from datetime import datetime
# ------------------ Load API KEYS ------------------
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
OCR_SPACE_API_KEY = os.getenv("OCR_SPACE_API_KEY", "").strip()
if not GROQ_API_KEY:
raise ValueError("❌ GROQ_API_KEY missing. Set it in env / Hugging Face Secrets.")
if not OCR_SPACE_API_KEY:
raise ValueError("❌ OCR_SPACE_API_KEY missing. Set it in env / Hugging Face Secrets.")
HEADERS = {"Authorization": f"Bearer {GROQ_API_KEY}"}
# ------------------ Global State ------------------
SESSION_HISTORY = {}
CHAT_DISPLAY = {}
PDF_CONTENT = {}
PDF_EMBEDS = {}
IMAGE_TEXT = {}
IMAGE_EMBEDS = {}
CHUNK_SIZE = 1500
# Load embedding model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
# ------------------ Helpers ------------------
def _get_path_from_gr_file(gr_file):
if not gr_file:
return None
if isinstance(gr_file, str) and os.path.exists(gr_file):
return gr_file
try:
if hasattr(gr_file, "name") and os.path.exists(gr_file.name):
return gr_file.name
except Exception:
pass
if isinstance(gr_file, dict):
for key in ("name", "file_name", "filepath"):
if key in gr_file:
candidate = gr_file.get(key)
if isinstance(candidate, str) and os.path.exists(candidate):
return candidate
return None
def chunk_text(text, size=CHUNK_SIZE):
return [text[i:i + size] for i in range(0, len(text), size)]
def synthesize_speech(text, lang="en"):
try:
if not text:
return None
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
gTTS(text=text, lang=lang).save(tmp.name)
return tmp.name
except Exception as e:
print("TTS error:", e)
return None
def select_relevant_chunk(question, chunks, chunk_embeds):
if not chunks or chunk_embeds is None:
return ""
q_emb = embed_model.encode(question, convert_to_tensor=True)
scores = util.cos_sim(q_emb, chunk_embeds)[0]
top_idx = int(scores.argmax().item())
return chunks[top_idx]
def _chat_display_to_messages(chat_display):
msgs = []
for user, assistant in chat_display:
msgs.append({"role": "user", "content": user})
msgs.append({"role": "assistant", "content": assistant})
return msgs
# ------------------ Transcription & LLM ------------------
def transcribe_audio(audio_path):
if not audio_path or not os.path.exists(audio_path):
return "Error: audio file missing."
try:
url = "https://api.groq.com/openai/v1/audio/transcriptions"
with open(audio_path, "rb") as f:
files = {"file": (os.path.basename(audio_path), f, "audio/wav")}
data = {"model": "whisper-large-v3"}
resp = requests.post(url, headers=HEADERS, files=files, data=data, timeout=60)
resp.raise_for_status()
return resp.json().get("text", "") or ""
except Exception as e:
print("transcription error:", e)
return f"Error transcribing audio: {e}"
def groq_chat_completion(messages):
body = {"model": "llama-3.1-8b-instant", "messages": messages}
try:
resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"]
except Exception as e:
print("groq_chat_completion error:", e)
return f"Error generating response: {e}"
def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_tone="Helpful"):
if session_id not in SESSION_HISTORY:
SESSION_HISTORY[session_id] = []
SESSION_HISTORY[session_id].append({"role": "user", "content": user_text})
messages = [{"role": "system", "content": "You are a helpful AI assistant."}] + SESSION_HISTORY[session_id]
if enhancer_enabled:
messages.append({"role": "user", "content": f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"})
assistant_text = groq_chat_completion(messages)
SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
return assistant_text
# ------------------ PDF handling ------------------
def handle_pdf_upload(pdf_file, session_id):
path = _get_path_from_gr_file(pdf_file)
if not path:
return "No file uploaded or file unreadable."
try:
reader = PdfReader(path)
text = ""
for page in reader.pages:
text += (page.extract_text() or "") + "\n"
if not text.strip():
return "No extractable content found in PDF."
chunks = chunk_text(text)
PDF_CONTENT[session_id] = chunks
PDF_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
return f"PDF processed: {len(chunks)} chunks ready."
except Exception as e:
print("PDF upload error:", e)
return f"Error processing PDF: {e}"
def handle_pdf_question(question, session_id):
if session_id not in PDF_CONTENT:
return "Document not found. Upload first."
chunk = select_relevant_chunk(question, PDF_CONTENT[session_id], PDF_EMBEDS[session_id])
messages = [
{"role": "system", "content": "You are a helpful assistant summarizing PDF content."},
{"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"}
]
assistant_text = groq_chat_completion(messages)
assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
if session_id not in SESSION_HISTORY:
SESSION_HISTORY[session_id] = []
SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
return assistant_text
# ------------------ Image OCR ------------------
def ocr_space_file(image_path, api_key, language="eng"):
if not image_path or not os.path.exists(image_path):
return ""
try:
with open(image_path, "rb") as f:
payload = {"apikey": api_key, "language": language}
files = {"file": f}
r = requests.post("https://api.ocr.space/parse/image", files=files, data=payload, timeout=60)
r.raise_for_status()
j = r.json()
if j.get("IsErroredOnProcessing"):
print("OCR.space processing error:", j)
return ""
parsed = [pr.get("ParsedText", "") for pr in j.get("ParsedResults", [])]
return "\n".join(parsed)
except Exception as e:
print("ocr_space_file error:", e)
return ""
def handle_image_upload(image_file, session_id):
path = _get_path_from_gr_file(image_file)
if not path:
return "No image uploaded or file unreadable.", ""
parsed = ocr_space_file(path, OCR_SPACE_API_KEY)
if not parsed.strip():
return "No extractable text found in the image.", ""
chunks = chunk_text(parsed)
IMAGE_TEXT[session_id] = chunks
IMAGE_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True)
return f"Image processed: {len(chunks)} chunks ready.", ""
def handle_image_question(question, session_id):
if session_id not in IMAGE_TEXT:
return "Image not found. Upload first."
chunk = select_relevant_chunk(question, IMAGE_TEXT[session_id], IMAGE_EMBEDS[session_id])
messages = [
{"role": "system", "content": "You are a helpful assistant summarizing image text."},
{"role": "user", "content": f"Image chunk:\n{chunk}\n\nQuestion: {question}"}
]
assistant_text = groq_chat_completion(messages)
assistant_text = f"**Snippet from Image:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}"
if session_id not in SESSION_HISTORY:
SESSION_HISTORY[session_id] = []
SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text})
return assistant_text
# ------------------ PDF Generation ------------------
def generate_pdf_file(text, filename_prefix="summary"):
pdf = FPDF()
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.set_font("Arial", "B", size=14)
pdf.multi_cell(0, 8, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
pdf.set_font("Arial", size=12)
for line in text.split("\n"):
pdf.multi_cell(0, 6, line)
file_path = f"/tmp/{filename_prefix}_{uuid.uuid4()}.pdf"
pdf.output(file_path)
return file_path
def download_pdf_summary(session_id):
summary_text = "\n".join([m["content"] for m in SESSION_HISTORY.get(session_id, []) if m["role"]=="assistant"])
if not summary_text:
summary_text = "No summary available."
return generate_pdf_file(summary_text, "summary")
# ------------------ Voice & Chat Handlers ------------------
def _append_chat_display(session_id, user_text, assistant_text):
if session_id not in CHAT_DISPLAY:
CHAT_DISPLAY[session_id] = []
CHAT_DISPLAY[session_id].append((user_text, assistant_text))
def handle_voice_general(audio_file, session_id, tts_lang="en", enhancer_enabled=False, enhancer_tone="Helpful"):
path = _get_path_from_gr_file(audio_file)
if not path:
return "No audio provided.", None, []
user_text = transcribe_audio(path)
assistant_text = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
_append_chat_display(session_id, user_text, assistant_text)
audio_path = synthesize_speech(assistant_text, lang=tts_lang)
return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
def handle_voice_pdf(audio_file, session_id, tts_lang="en"):
path = _get_path_from_gr_file(audio_file)
if not path:
return "No audio provided.", None, []
user_text = transcribe_audio(path)
assistant_text = handle_pdf_question(user_text, session_id)
_append_chat_display(session_id, user_text, assistant_text)
audio_path = synthesize_speech(assistant_text, lang=tts_lang)
return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
def handle_voice_image(audio_file, session_id, tts_lang="en"):
path = _get_path_from_gr_file(audio_file)
if not path:
return "No audio provided.", None, []
user_text = transcribe_audio(path)
assistant_text = handle_image_question(user_text, session_id)
_append_chat_display(session_id, user_text, assistant_text)
audio_path = synthesize_speech(assistant_text, lang=tts_lang)
return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id])
def handle_text_general(user_text, session_id, enhancer_enabled=False, enhancer_tone="Helpful"):
assistant = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone)
_append_chat_display(session_id, user_text, assistant)
return assistant, _chat_display_to_messages(CHAT_DISPLAY[session_id])
def handle_text_pdf(question, session_id):
return handle_pdf_question(question, session_id)
def handle_text_image(question, session_id):
return handle_image_question(question, session_id)
# ------------------ Gradio UI ------------------
with gr.Blocks() as demo:
# Inject CSS via HTML to avoid gr.Blocks(css=...) (compatibility)
gr.HTML("""
<style>
/* ================= MIC INPUT BOX FIX ================= */
/* ===== MIC BOX FIX ===== */
#mic_box {
width: 100% !important;
height: 250px !important;
padding: 10px !important;
margin: 0 !important;
border-radius: 20px !important;
background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important;
border: 2px solid #aa66ff !important;
box-shadow: 0 8px 30px rgba(0,200,255,0.2) !important;
overflow: visible !important;
}
/* Hide all buttons except the first record and first stop */
#mic_box button[title="Record"]:not(:first-of-type),
#mic_box button[title="Stop"]:not(:first-of-type),
#mic_box button[title="Download"],
#mic_box button[title="Share"],
#mic_box button[title="Edit"] {
display: none !important;
}
/* Make buttons smaller */
#mic_box button {
width: 36px !important;
height: 36px !important;
min-width: 36px !important;
min-height: 36px !important;
padding: 0 !important;
margin: 0 5px !important;
}
/* Center everything */
#mic_box > div {
width: 100% !important;
min-height: 100px !important;
display: flex !important;
flex-direction: column !important;
align-items: left !important;
justify-content: center !important;
gap: 20px !important;
}
/* Button styles */
#mic_box button[title="Record"] {
background: #ff4d4d !important;
border-radius: 50% !important;
}
#mic_box button[title="Stop"] {
background: #4CAF50 !important;
border-radius: 50% !important;
}
/* Hide other elements */
#mic_box .waveform,
#mic_box .time,
#mic_box .duration {
display: none !important;
}
/* Style the "Drop audio here" text */
#mic_box .drag-text {
color: #6a0dad !important;
font-weight: bold !important;
font-size: 14px !important;
margin-top: 5px !important;
text-align: center !important;
}
/* ================= AUDIO PLAYER FIX ================= */
#audio_output_box {
width: 100% !important;
padding: 12px !important;
margin: 0 !important;
border-radius: 20px !important;
background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important;
border: 2px solid #aa66ff !important;
box-shadow: 0 8px 30px rgba(0,200,255,0.2) !important;
overflow: visible !important;
}
/* Audio player container */
#audio_output_box .gradio-audio {
width: 100% !important;
min-width: 100% !important;
margin: 0 !important;
padding: 0 !important;
position: relative;
}
/* Controls row */
#audio_output_box .controls {
display: flex !important;
align-items: center !important;
justify-content: flex-start !important;
width: 100% !important;
gap: 4px !important;
padding: 4px 0 !important;
margin: 0 !important;
flex-wrap: nowrap !important;
overflow: visible !important;
}
/* Buttons */
#audio_output_box button {
width: 32px !important;
height: 32px !important;
min-width: 32px !important;
min-height: 32px !important;
margin: 0 2px !important;
padding: 0 !important;
border-radius: 50% !important;
background: linear-gradient(90deg, #7fe9ff, #00a1ff) !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
box-shadow: 0 2px 6px rgba(0,0,0,0.1) !important;
flex-shrink: 0 !important;
}
/* Button icons */
#audio_output_box button svg {
width: 16px !important;
height: 16px !important;
color: #000 !important;
fill: #000 !important;
}
/* Progress bar */
#audio_output_box .progress {
flex: 1 !important;
min-width: 60px !important;
margin: 0 8px !important;
height: 4px !important;
background: rgba(0,0,0,0.1) !important;
border-radius: 2px !important;
}
/* Time display */
#audio_output_box .time {
min-width: 70px !important;
text-align: center !important;
font-size: 12px !important;
color: #000 !important;
font-weight: 600 !important;
padding: 0 4px !important;
flex-shrink: 0 !important;
}
/* Volume control */
#audio_output_box .volume {
min-width: 80px !important;
margin-left: 4px !important;
flex-shrink: 0 !important;
}
/* Hide default audio element */
#audio_output_box audio {
display: none !important;
}
/* Ensure all controls are properly aligned */
#audio_output_box .gradio-audio > div {
display: flex !important;
align-items: center !important;
width: 100% !important;
overflow: visible !important;
}
/* Force button visibility */
#audio_output_box button[title="Mute"] {
margin-right: 4px !important;
}
/* Ensure time display has enough space */
#audio_output_box .time {
min-width: 80px !important;
text-align: center !important;
}
/* Audio player container adjustments */
#audio_output_box .gradio-audio {
min-height: 50px !important;
display: flex !important;
align-items: center !important;
padding: 8px !important;
}
/* --------------------- GLOBAL BODY & CONTAINER --------------------- */
body, .gradio-container {
background: radial-gradient(circle at 8% 12%, #5a6ea0 0%, #7a8fc0 60%, #6f82b5 100%) !important;
color: #eaf6ff !important;
font-family: Inter, Arial, sans-serif;
-webkit-font-smoothing: antialiased;
}
/* --------------------- HEADER & TITLE --------------------- */
.header-box {
text-align: center;
padding: 18px 12px;
margin-bottom: 14px;
border-radius: 14px;
background: linear-gradient(180deg, rgba(255,255,255,0.08), rgba(255,255,255,0.03));
box-shadow: 0 12px 46px rgba(20, 30, 70, 0.35), inset 0 1px 0 rgba(255,255,255,0.03);
}
.app-title {
font-size: 34px;
font-weight: 900;
color: #ffffff !important;
letter-spacing: 1px;
text-shadow: 0 0 18px rgba(180,220,255,0.95), 0 0 48px rgba(140,180,255,0.65);
animation: titleGlow 3s infinite alternate;
}
@keyframes titleGlow {
from { opacity: 0.88; text-shadow: 0 0 16px rgba(120,200,255,0.6); transform: translateY(0); }
to { opacity: 1; text-shadow: 0 0 54px rgba(200,240,255,0.98); transform: translateY(-2px); }
}
.app-sub { color: rgba(235,245,255,0.98); margin-top:6px; }
/* --------------------- CARDS & BOXES --------------------- */
.glow-card, .header-box, .gr-chatbot, .upload-box, .gradio-container > .container {
border-radius: 20px !important;
border: 1px solid rgba(120,180,255,0.22) !important;
box-shadow: 0 16px 50px rgba(10,20,40,0.20), inset 0 0 28px rgba(140,180,255,0.02);
transition: transform 0.28s ease, box-shadow 0.28s ease;
}
.glow-card:hover { transform: translateY(-6px); box-shadow: 0 22px 58px rgba(30,50,90,0.32); }
/* --------------------- BUTTONS --------------------- */
.neon-btn, button {
background: linear-gradient(90deg,#7fe9ff,#00a1ff) !important;
color: #001528 !important;
border-radius: 18px !important;
padding: 10px 18px !important;
font-weight: 800 !important;
border: none !important;
box-shadow: 0 16px 36px rgba(0,150,255,0.28), 0 0 120px rgba(0,190,255,0.24), inset 0 1px 0 rgba(255,255,255,0.06);
transition: transform .18s ease, box-shadow .18s ease;
}
.neon-btn:hover, button:hover {
transform: translateY(-8px) scale(1.06);
box-shadow: 0 24px 58px rgba(0,160,255,0.46), 0 0 140px rgba(0,220,255,0.34);
}
.neon-btn:active, button:active { transform: scale(.98); }
/* --------------------- MIC & AUDIO BOX --------------------- */
@keyframes micPulse {
from { transform: scale(.985); box-shadow: 0 0 28px rgba(0,140,255,0.36); }
to { transform: scale(1.04); box-shadow: 0 0 120px rgba(0,230,255,0.95); }
}
/* AUDIO BUTTONS ICONS */
/* --------------------- MIC & AUDIO ICONS FIX --------------------- */
/* Make all audio control buttons visible in Assistant Voice Output */
#audio_output_box .gradio-audio button,
#audio_output_box .gradio-audio button svg,
#audio_output_box .gradio-audio button i {
color: #00f7ff !important; /* bright cyan */
fill: #00f7ff !important;
stroke: #00f7ff !important;
background: transparent !important;
filter: drop-shadow(0 8px 26px rgba(0,255,255,0.7)) !important;
}
#audio_output_box {
border-radius: 20px !important;
padding: 8px !important;
background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important;
border: 2px solid #aa66ff !important;
box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1);
}
#audio_output_box audio {
border-radius: 12px !important;
height: 40px !important;
width: 100% !important;
background: linear-gradient(90deg, rgba(255,255,255,0.02), rgba(255,255,255,0.04));
border: 2px solid rgba(0,180,255,0.12) !important;
}
#audio_output_box .gradio-audio button,
#audio_output_box .gradio-audio button svg,
#audio_output_box .gradio-audio button i {
color: #000000 !important;
fill: #000000 !important;
stroke: #000000 !important;
opacity: 1 !important;
filter: none !important;
}
/* AUDIO ELEMENTS */
#audio_output_box, #mic_box, #pdf_box, #img_box {
border-radius: 20px !important;
padding: 8px !important;
background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important;
border: 2px solid #aa66ff !important;
box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1);
position: relative !important;
overflow: visible !important;
}
/* ====================== PDF Upload Box ====================== */
#pdf_box,
#pdf_box .file-container,
#pdf_box .gr-file,
#pdf_box input[type="file"],
#pdf_box button {
border-radius: 20px !important;
padding: 8px !important;
background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; /* same gradient as mic box */
border: 2px solid #aa66ff !important;
box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1);
color: #001528 !important;
transition: transform 0.2s, box-shadow 0.2s;
}
/* Hover effect for PDF box */
#pdf_box:hover,
#pdf_box .file-container:hover,
#pdf_box .gr-file:hover,
#pdf_box input[type="file"]:hover,
#pdf_box button:hover {
transform: translateY(-4px);
box-shadow: 0 12px 40px rgba(30,50,90,0.32), inset 0 0 25px rgba(0,220,255,0.08);
}
/* ====================== Image Upload Box ====================== */
#img_box,
#img_box .file-container,
#img_box .gr-file,
#img_box input[type="file"],
#img_box button {
border-radius: 20px !important;
padding: 8px !important;
background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; /* same gradient as mic box */
border: 2px solid #aa66ff !important;
box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1);
color: #001528 !important;
transition: transform 0.2s, box-shadow 0.2s;
}
/* Hover effect for Image box */
#img_box:hover,
#img_box .file-container:hover,
#img_box .gr-file:hover,
#img_box input[type="file"]:hover,
#img_box button:hover {
transform: translateY(-4px);
box-shadow: 0 12px 40px rgba(30,50,90,0.32), inset 0 0 25px rgba(0,220,255,0.08);
}
/* Adjust mic glow ring to fit new box size */
#mic_box::before {
width: 88px !important;
height: 88px !important;
left: -14px !important;
top: -14px !important;
}
/* --------------------- CHAT BOT --------------------- */
.gr-chatbot {
border-radius: 28px !important;
padding: 12px !important;
background: linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01)) !important;
border: 1px solid rgba(120,170,255,0.06) !important;
box-shadow: 0 16px 40px rgba(10,20,40,0.20);
}
.message.user { background: linear-gradient(90deg,#b0e0ff,#60b0ff) !important; color: #001528 !important; border-radius: 20px !important; padding:8px !important; }
.message.bot { background: linear-gradient(90deg,#004cff,#00aaff) !important; color: #ffffff !important; border-radius: 20px !important; padding:8px !important; }
/* --------------------- INPUTS & DROPDOWNS --------------------- */
label, .gradio-textbox label, .gradio-select label, .gradio-dropdown label, .gradio-file label {
color: #001528 !important;
font-weight: 800;
}
select, .gradio-dropdown, .gradio-select, .gradio-file, input, textarea {
color: #00171f !important;
background: rgba(255,255,255,0.96) !important;
border: 1px solid rgba(0,120,255,0.12) !important;
}
.gradio-row .gradio-dropdown, .gradio-row select, .gradio-dropdown select {
background: linear-gradient(180deg,#ffffff,#f0f8ff) !important;
color: #00171f !important;
font-weight: 800;
border-radius: 12px !important;
}
/* --------------------- MARKDOWN & TEXT --------------------- */
.gr-markdown, h1, h2, h3, p {
color: #effbff !important;
}
/* --------------------- ANIMATIONS --------------------- */
.section-slide { animation: slideIn 0.9s cubic-bezier(.2,.9,.3,1) both; }
@keyframes slideIn { 0% { transform: translateY(26px); opacity: 0 } 100% { transform: translateY(0); opacity: 1 } }
.shimmer { background: linear-gradient(90deg, rgba(255,255,255,0.02), rgba(255,255,255,0.06), rgba(255,255,255,0.02)); background-size: 200% 100%; animation: shimmer 2.8s infinite linear; }
@keyframes shimmer { 0% { background-position: 200% 0 } 100% { background-position: -200% 0 } }
/* --------------------- RESPONSIVE --------------------- */
@media (max-width: 800px) { #mic_box::before { left: -12px; top: -12px; width: 96px; height: 96px; } .app-title { font-size: 22px; } }
@media (max-width: 480px) { #mic_box::before { left: -10px; top: -10px; width: 80px; height: 80px; } #mic_box audio { height: 36px !important; } #mic_box svg, #mic_box i { font-size: 20px !important; } }
</style>
""")
# Header (title color adjusted for visibility)
gr.HTML("""
<div class="header-box">
<div class="app-title">⚡ OmniSense AI Bot — Ultra Neon</div>
<div class="app-sub">Voice • PDF • Image — Multi-Modal Intelligence with Neon FX</div>
</div>
""")
# Keep the main markdown visible and readable
gr.Markdown("## 🛠 Multi-Mode AI Assistant (Voice, PDF, Image)")
session_voice = gr.State(str(uuid.uuid4()))
session_pdf = gr.State(str(uuid.uuid4()))
session_image = gr.State(str(uuid.uuid4()))
# ---------------- Voice Tab ----------------
with gr.Tab("🎤 Voice Chat"):
chat_voice = gr.Chatbot(height=320, elem_classes=["section-slide"])
with gr.Row(elem_classes=["section-slide"]):
mic = gr.Audio(type="filepath", label="🎤 Record Voice (hold & speak)", elem_id="mic_box", interactive=True)
audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False,elem_id="audio_output_box")
tts_lang = gr.Dropdown(choices=["en","ur"], value="en", label="TTS Language")
with gr.Row(elem_classes=["section-slide"]):
btn_general = gr.Button("⚡Ask General 🎯", elem_classes=["neon-btn"])
btn_pdf = gr.Button("⚡Ask PDF 📄", elem_classes=["neon-btn"])
btn_image = gr.Button("⚡Ask Image 🖼", elem_classes=["neon-btn"])
enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale=1)
tone_dropdown = gr.Dropdown(choices=["Helpful","Formal","Friendly"], value="Helpful", label="Enhancer Tone", scale=1)
with gr.Row(elem_classes=["section-slide"]):
btn_reset_logs = gr.Button("♻ Reset LOGs", elem_classes=["neon-btn"])
btn_download_logs = gr.Button("📥 Download Summary", elem_classes=["neon-btn"])
Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1,elem_id="summary_file_voice")
answer_voice = gr.Textbox(label="Assistant Answer", lines=2, visible=False)
# Bind click handlers (functionality unchanged)
btn_general.click(fn=handle_voice_general, inputs=[mic, session_voice, tts_lang, enhancer_toggle, tone_dropdown], outputs=[answer_voice, audio_output, chat_voice])
btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice])
btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice])
btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file])
# ---------------- PDF Tab ----------------
with gr.Tab("📄 PDF Summarizer"):
pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5, elem_classes=["glow-card","section-slide"])
with gr.Row(elem_classes=["section-slide"]):
pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"], scale=1,elem_id="pdf_box")
pdf_question = gr.Textbox(label="Ask a question about PDF", lines=3)
pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
with gr.Row(elem_classes=["section-slide"]):
pdf_send_btn = gr.Button("Ask (Questions)", elem_classes=["neon-btn"])
pdf_reset_btn = gr.Button("♻ Reset LOGs", elem_classes=["neon-btn"])
with gr.Row(elem_classes=["section-slide"]):
pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1)
pdf_download_btn = gr.Button("📥 Download Summary", elem_classes=["neon-btn"])
pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg])
pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output])
pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output])
pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file])
# ---------------- IMAGE Tab ----------------
with gr.Tab("🖼 Image OCR"):
image_output = gr.Textbox(label="Answer (Text Only)", lines=5, elem_classes=["glow-card","section-slide"])
with gr.Row(elem_classes=["section-slide"]):
image_upload_btn = gr.File(label="Upload Image", file_types=[".png",".jpg",".jpeg"], scale=1, elem_id="img_box")
image_question = gr.Textbox(label="Ask question about Image", lines=3)
image_upload_msg = gr.Textbox(label="Upload Status", interactive=False)
with gr.Row(elem_classes=["section-slide"]):
image_send_btn = gr.Button("Ask (Questions)", elem_classes=["neon-btn"])
image_reset_btn = gr.Button("♻ Reset LOGs", elem_classes=["neon-btn"])
with gr.Row(elem_classes=["section-slide"]):
image_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1)
image_download_btn = gr.Button("📥 Download Summary", elem_classes=["neon-btn"])
image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output])
image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output])
image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output])
image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file])
if __name__ == "__main__":
demo.launch()