Spaces:
Sleeping
Sleeping
| # app.py | |
| """ | |
| Multi-Mode AI Assistant (Voice, PDF, Image) with Ultra Neon FX | |
| - Preserves all original logic and functionality | |
| - Visual tweaks: Background lightened (~30%), stronger header and labels, | |
| brighter mic icon, increased button glow, neon borders, animations, rounded chat boxes, | |
| soft shadows. NO functional changes. | |
| """ | |
| import os | |
| import uuid | |
| import tempfile | |
| import requests | |
| from dotenv import load_dotenv | |
| from gtts import gTTS | |
| from PyPDF2 import PdfReader | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer, util | |
| from fpdf import FPDF | |
| from datetime import datetime | |
| # ------------------ Load API KEYS ------------------ | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip() | |
| OCR_SPACE_API_KEY = os.getenv("OCR_SPACE_API_KEY", "").strip() | |
| if not GROQ_API_KEY: | |
| raise ValueError("❌ GROQ_API_KEY missing. Set it in env / Hugging Face Secrets.") | |
| if not OCR_SPACE_API_KEY: | |
| raise ValueError("❌ OCR_SPACE_API_KEY missing. Set it in env / Hugging Face Secrets.") | |
| HEADERS = {"Authorization": f"Bearer {GROQ_API_KEY}"} | |
| # ------------------ Global State ------------------ | |
| SESSION_HISTORY = {} | |
| CHAT_DISPLAY = {} | |
| PDF_CONTENT = {} | |
| PDF_EMBEDS = {} | |
| IMAGE_TEXT = {} | |
| IMAGE_EMBEDS = {} | |
| CHUNK_SIZE = 1500 | |
| # Load embedding model | |
| embed_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # ------------------ Helpers ------------------ | |
| def _get_path_from_gr_file(gr_file): | |
| if not gr_file: | |
| return None | |
| if isinstance(gr_file, str) and os.path.exists(gr_file): | |
| return gr_file | |
| try: | |
| if hasattr(gr_file, "name") and os.path.exists(gr_file.name): | |
| return gr_file.name | |
| except Exception: | |
| pass | |
| if isinstance(gr_file, dict): | |
| for key in ("name", "file_name", "filepath"): | |
| if key in gr_file: | |
| candidate = gr_file.get(key) | |
| if isinstance(candidate, str) and os.path.exists(candidate): | |
| return candidate | |
| return None | |
| def chunk_text(text, size=CHUNK_SIZE): | |
| return [text[i:i + size] for i in range(0, len(text), size)] | |
| def synthesize_speech(text, lang="en"): | |
| try: | |
| if not text: | |
| return None | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| gTTS(text=text, lang=lang).save(tmp.name) | |
| return tmp.name | |
| except Exception as e: | |
| print("TTS error:", e) | |
| return None | |
| def select_relevant_chunk(question, chunks, chunk_embeds): | |
| if not chunks or chunk_embeds is None: | |
| return "" | |
| q_emb = embed_model.encode(question, convert_to_tensor=True) | |
| scores = util.cos_sim(q_emb, chunk_embeds)[0] | |
| top_idx = int(scores.argmax().item()) | |
| return chunks[top_idx] | |
| def _chat_display_to_messages(chat_display): | |
| msgs = [] | |
| for user, assistant in chat_display: | |
| msgs.append({"role": "user", "content": user}) | |
| msgs.append({"role": "assistant", "content": assistant}) | |
| return msgs | |
| # ------------------ Transcription & LLM ------------------ | |
| def transcribe_audio(audio_path): | |
| if not audio_path or not os.path.exists(audio_path): | |
| return "Error: audio file missing." | |
| try: | |
| url = "https://api.groq.com/openai/v1/audio/transcriptions" | |
| with open(audio_path, "rb") as f: | |
| files = {"file": (os.path.basename(audio_path), f, "audio/wav")} | |
| data = {"model": "whisper-large-v3"} | |
| resp = requests.post(url, headers=HEADERS, files=files, data=data, timeout=60) | |
| resp.raise_for_status() | |
| return resp.json().get("text", "") or "" | |
| except Exception as e: | |
| print("transcription error:", e) | |
| return f"Error transcribing audio: {e}" | |
| def groq_chat_completion(messages): | |
| body = {"model": "llama-3.1-8b-instant", "messages": messages} | |
| try: | |
| resp = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=HEADERS, json=body, timeout=60) | |
| resp.raise_for_status() | |
| return resp.json()["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| print("groq_chat_completion error:", e) | |
| return f"Error generating response: {e}" | |
| def generate_response(session_id, user_text, enhancer_enabled=False, enhancer_tone="Helpful"): | |
| if session_id not in SESSION_HISTORY: | |
| SESSION_HISTORY[session_id] = [] | |
| SESSION_HISTORY[session_id].append({"role": "user", "content": user_text}) | |
| messages = [{"role": "system", "content": "You are a helpful AI assistant."}] + SESSION_HISTORY[session_id] | |
| if enhancer_enabled: | |
| messages.append({"role": "user", "content": f"Enhance response. Tone: {enhancer_tone}. Question: {user_text}"}) | |
| assistant_text = groq_chat_completion(messages) | |
| SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text}) | |
| return assistant_text | |
| # ------------------ PDF handling ------------------ | |
| def handle_pdf_upload(pdf_file, session_id): | |
| path = _get_path_from_gr_file(pdf_file) | |
| if not path: | |
| return "No file uploaded or file unreadable." | |
| try: | |
| reader = PdfReader(path) | |
| text = "" | |
| for page in reader.pages: | |
| text += (page.extract_text() or "") + "\n" | |
| if not text.strip(): | |
| return "No extractable content found in PDF." | |
| chunks = chunk_text(text) | |
| PDF_CONTENT[session_id] = chunks | |
| PDF_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True) | |
| return f"PDF processed: {len(chunks)} chunks ready." | |
| except Exception as e: | |
| print("PDF upload error:", e) | |
| return f"Error processing PDF: {e}" | |
| def handle_pdf_question(question, session_id): | |
| if session_id not in PDF_CONTENT: | |
| return "Document not found. Upload first." | |
| chunk = select_relevant_chunk(question, PDF_CONTENT[session_id], PDF_EMBEDS[session_id]) | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant summarizing PDF content."}, | |
| {"role": "user", "content": f"PDF chunk:\n{chunk}\n\nQuestion: {question}"} | |
| ] | |
| assistant_text = groq_chat_completion(messages) | |
| assistant_text = f"**Snippet from PDF:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}" | |
| if session_id not in SESSION_HISTORY: | |
| SESSION_HISTORY[session_id] = [] | |
| SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text}) | |
| return assistant_text | |
| # ------------------ Image OCR ------------------ | |
| def ocr_space_file(image_path, api_key, language="eng"): | |
| if not image_path or not os.path.exists(image_path): | |
| return "" | |
| try: | |
| with open(image_path, "rb") as f: | |
| payload = {"apikey": api_key, "language": language} | |
| files = {"file": f} | |
| r = requests.post("https://api.ocr.space/parse/image", files=files, data=payload, timeout=60) | |
| r.raise_for_status() | |
| j = r.json() | |
| if j.get("IsErroredOnProcessing"): | |
| print("OCR.space processing error:", j) | |
| return "" | |
| parsed = [pr.get("ParsedText", "") for pr in j.get("ParsedResults", [])] | |
| return "\n".join(parsed) | |
| except Exception as e: | |
| print("ocr_space_file error:", e) | |
| return "" | |
| def handle_image_upload(image_file, session_id): | |
| path = _get_path_from_gr_file(image_file) | |
| if not path: | |
| return "No image uploaded or file unreadable.", "" | |
| parsed = ocr_space_file(path, OCR_SPACE_API_KEY) | |
| if not parsed.strip(): | |
| return "No extractable text found in the image.", "" | |
| chunks = chunk_text(parsed) | |
| IMAGE_TEXT[session_id] = chunks | |
| IMAGE_EMBEDS[session_id] = embed_model.encode(chunks, convert_to_tensor=True) | |
| return f"Image processed: {len(chunks)} chunks ready.", "" | |
| def handle_image_question(question, session_id): | |
| if session_id not in IMAGE_TEXT: | |
| return "Image not found. Upload first." | |
| chunk = select_relevant_chunk(question, IMAGE_TEXT[session_id], IMAGE_EMBEDS[session_id]) | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant summarizing image text."}, | |
| {"role": "user", "content": f"Image chunk:\n{chunk}\n\nQuestion: {question}"} | |
| ] | |
| assistant_text = groq_chat_completion(messages) | |
| assistant_text = f"**Snippet from Image:**\n{chunk[:200]}...\n\n**Answer:**\n{assistant_text}" | |
| if session_id not in SESSION_HISTORY: | |
| SESSION_HISTORY[session_id] = [] | |
| SESSION_HISTORY[session_id].append({"role": "assistant", "content": assistant_text}) | |
| return assistant_text | |
| # ------------------ PDF Generation ------------------ | |
| def generate_pdf_file(text, filename_prefix="summary"): | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| pdf.set_font("Arial", "B", size=14) | |
| pdf.multi_cell(0, 8, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n") | |
| pdf.set_font("Arial", size=12) | |
| for line in text.split("\n"): | |
| pdf.multi_cell(0, 6, line) | |
| file_path = f"/tmp/{filename_prefix}_{uuid.uuid4()}.pdf" | |
| pdf.output(file_path) | |
| return file_path | |
| def download_pdf_summary(session_id): | |
| summary_text = "\n".join([m["content"] for m in SESSION_HISTORY.get(session_id, []) if m["role"]=="assistant"]) | |
| if not summary_text: | |
| summary_text = "No summary available." | |
| return generate_pdf_file(summary_text, "summary") | |
| # ------------------ Voice & Chat Handlers ------------------ | |
| def _append_chat_display(session_id, user_text, assistant_text): | |
| if session_id not in CHAT_DISPLAY: | |
| CHAT_DISPLAY[session_id] = [] | |
| CHAT_DISPLAY[session_id].append((user_text, assistant_text)) | |
| def handle_voice_general(audio_file, session_id, tts_lang="en", enhancer_enabled=False, enhancer_tone="Helpful"): | |
| path = _get_path_from_gr_file(audio_file) | |
| if not path: | |
| return "No audio provided.", None, [] | |
| user_text = transcribe_audio(path) | |
| assistant_text = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone) | |
| _append_chat_display(session_id, user_text, assistant_text) | |
| audio_path = synthesize_speech(assistant_text, lang=tts_lang) | |
| return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id]) | |
| def handle_voice_pdf(audio_file, session_id, tts_lang="en"): | |
| path = _get_path_from_gr_file(audio_file) | |
| if not path: | |
| return "No audio provided.", None, [] | |
| user_text = transcribe_audio(path) | |
| assistant_text = handle_pdf_question(user_text, session_id) | |
| _append_chat_display(session_id, user_text, assistant_text) | |
| audio_path = synthesize_speech(assistant_text, lang=tts_lang) | |
| return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id]) | |
| def handle_voice_image(audio_file, session_id, tts_lang="en"): | |
| path = _get_path_from_gr_file(audio_file) | |
| if not path: | |
| return "No audio provided.", None, [] | |
| user_text = transcribe_audio(path) | |
| assistant_text = handle_image_question(user_text, session_id) | |
| _append_chat_display(session_id, user_text, assistant_text) | |
| audio_path = synthesize_speech(assistant_text, lang=tts_lang) | |
| return assistant_text, audio_path, _chat_display_to_messages(CHAT_DISPLAY[session_id]) | |
| def handle_text_general(user_text, session_id, enhancer_enabled=False, enhancer_tone="Helpful"): | |
| assistant = generate_response(session_id, user_text, enhancer_enabled, enhancer_tone) | |
| _append_chat_display(session_id, user_text, assistant) | |
| return assistant, _chat_display_to_messages(CHAT_DISPLAY[session_id]) | |
| def handle_text_pdf(question, session_id): | |
| return handle_pdf_question(question, session_id) | |
| def handle_text_image(question, session_id): | |
| return handle_image_question(question, session_id) | |
| # ------------------ Gradio UI ------------------ | |
| with gr.Blocks() as demo: | |
| # Inject CSS via HTML to avoid gr.Blocks(css=...) (compatibility) | |
| gr.HTML(""" | |
| <style> | |
| /* ================= MIC INPUT BOX FIX ================= */ | |
| /* ===== MIC BOX FIX ===== */ | |
| #mic_box { | |
| width: 100% !important; | |
| height: 250px !important; | |
| padding: 10px !important; | |
| margin: 0 !important; | |
| border-radius: 20px !important; | |
| background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; | |
| border: 2px solid #aa66ff !important; | |
| box-shadow: 0 8px 30px rgba(0,200,255,0.2) !important; | |
| overflow: visible !important; | |
| } | |
| /* Hide all buttons except the first record and first stop */ | |
| #mic_box button[title="Record"]:not(:first-of-type), | |
| #mic_box button[title="Stop"]:not(:first-of-type), | |
| #mic_box button[title="Download"], | |
| #mic_box button[title="Share"], | |
| #mic_box button[title="Edit"] { | |
| display: none !important; | |
| } | |
| /* Make buttons smaller */ | |
| #mic_box button { | |
| width: 36px !important; | |
| height: 36px !important; | |
| min-width: 36px !important; | |
| min-height: 36px !important; | |
| padding: 0 !important; | |
| margin: 0 5px !important; | |
| } | |
| /* Center everything */ | |
| #mic_box > div { | |
| width: 100% !important; | |
| min-height: 100px !important; | |
| display: flex !important; | |
| flex-direction: column !important; | |
| align-items: left !important; | |
| justify-content: center !important; | |
| gap: 20px !important; | |
| } | |
| /* Button styles */ | |
| #mic_box button[title="Record"] { | |
| background: #ff4d4d !important; | |
| border-radius: 50% !important; | |
| } | |
| #mic_box button[title="Stop"] { | |
| background: #4CAF50 !important; | |
| border-radius: 50% !important; | |
| } | |
| /* Hide other elements */ | |
| #mic_box .waveform, | |
| #mic_box .time, | |
| #mic_box .duration { | |
| display: none !important; | |
| } | |
| /* Style the "Drop audio here" text */ | |
| #mic_box .drag-text { | |
| color: #6a0dad !important; | |
| font-weight: bold !important; | |
| font-size: 14px !important; | |
| margin-top: 5px !important; | |
| text-align: center !important; | |
| } | |
| /* ================= AUDIO PLAYER FIX ================= */ | |
| #audio_output_box { | |
| width: 100% !important; | |
| padding: 12px !important; | |
| margin: 0 !important; | |
| border-radius: 20px !important; | |
| background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; | |
| border: 2px solid #aa66ff !important; | |
| box-shadow: 0 8px 30px rgba(0,200,255,0.2) !important; | |
| overflow: visible !important; | |
| } | |
| /* Audio player container */ | |
| #audio_output_box .gradio-audio { | |
| width: 100% !important; | |
| min-width: 100% !important; | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| position: relative; | |
| } | |
| /* Controls row */ | |
| #audio_output_box .controls { | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: flex-start !important; | |
| width: 100% !important; | |
| gap: 4px !important; | |
| padding: 4px 0 !important; | |
| margin: 0 !important; | |
| flex-wrap: nowrap !important; | |
| overflow: visible !important; | |
| } | |
| /* Buttons */ | |
| #audio_output_box button { | |
| width: 32px !important; | |
| height: 32px !important; | |
| min-width: 32px !important; | |
| min-height: 32px !important; | |
| margin: 0 2px !important; | |
| padding: 0 !important; | |
| border-radius: 50% !important; | |
| background: linear-gradient(90deg, #7fe9ff, #00a1ff) !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| box-shadow: 0 2px 6px rgba(0,0,0,0.1) !important; | |
| flex-shrink: 0 !important; | |
| } | |
| /* Button icons */ | |
| #audio_output_box button svg { | |
| width: 16px !important; | |
| height: 16px !important; | |
| color: #000 !important; | |
| fill: #000 !important; | |
| } | |
| /* Progress bar */ | |
| #audio_output_box .progress { | |
| flex: 1 !important; | |
| min-width: 60px !important; | |
| margin: 0 8px !important; | |
| height: 4px !important; | |
| background: rgba(0,0,0,0.1) !important; | |
| border-radius: 2px !important; | |
| } | |
| /* Time display */ | |
| #audio_output_box .time { | |
| min-width: 70px !important; | |
| text-align: center !important; | |
| font-size: 12px !important; | |
| color: #000 !important; | |
| font-weight: 600 !important; | |
| padding: 0 4px !important; | |
| flex-shrink: 0 !important; | |
| } | |
| /* Volume control */ | |
| #audio_output_box .volume { | |
| min-width: 80px !important; | |
| margin-left: 4px !important; | |
| flex-shrink: 0 !important; | |
| } | |
| /* Hide default audio element */ | |
| #audio_output_box audio { | |
| display: none !important; | |
| } | |
| /* Ensure all controls are properly aligned */ | |
| #audio_output_box .gradio-audio > div { | |
| display: flex !important; | |
| align-items: center !important; | |
| width: 100% !important; | |
| overflow: visible !important; | |
| } | |
| /* Force button visibility */ | |
| #audio_output_box button[title="Mute"] { | |
| margin-right: 4px !important; | |
| } | |
| /* Ensure time display has enough space */ | |
| #audio_output_box .time { | |
| min-width: 80px !important; | |
| text-align: center !important; | |
| } | |
| /* Audio player container adjustments */ | |
| #audio_output_box .gradio-audio { | |
| min-height: 50px !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| padding: 8px !important; | |
| } | |
| /* --------------------- GLOBAL BODY & CONTAINER --------------------- */ | |
| body, .gradio-container { | |
| background: radial-gradient(circle at 8% 12%, #5a6ea0 0%, #7a8fc0 60%, #6f82b5 100%) !important; | |
| color: #eaf6ff !important; | |
| font-family: Inter, Arial, sans-serif; | |
| -webkit-font-smoothing: antialiased; | |
| } | |
| /* --------------------- HEADER & TITLE --------------------- */ | |
| .header-box { | |
| text-align: center; | |
| padding: 18px 12px; | |
| margin-bottom: 14px; | |
| border-radius: 14px; | |
| background: linear-gradient(180deg, rgba(255,255,255,0.08), rgba(255,255,255,0.03)); | |
| box-shadow: 0 12px 46px rgba(20, 30, 70, 0.35), inset 0 1px 0 rgba(255,255,255,0.03); | |
| } | |
| .app-title { | |
| font-size: 34px; | |
| font-weight: 900; | |
| color: #ffffff !important; | |
| letter-spacing: 1px; | |
| text-shadow: 0 0 18px rgba(180,220,255,0.95), 0 0 48px rgba(140,180,255,0.65); | |
| animation: titleGlow 3s infinite alternate; | |
| } | |
| @keyframes titleGlow { | |
| from { opacity: 0.88; text-shadow: 0 0 16px rgba(120,200,255,0.6); transform: translateY(0); } | |
| to { opacity: 1; text-shadow: 0 0 54px rgba(200,240,255,0.98); transform: translateY(-2px); } | |
| } | |
| .app-sub { color: rgba(235,245,255,0.98); margin-top:6px; } | |
| /* --------------------- CARDS & BOXES --------------------- */ | |
| .glow-card, .header-box, .gr-chatbot, .upload-box, .gradio-container > .container { | |
| border-radius: 20px !important; | |
| border: 1px solid rgba(120,180,255,0.22) !important; | |
| box-shadow: 0 16px 50px rgba(10,20,40,0.20), inset 0 0 28px rgba(140,180,255,0.02); | |
| transition: transform 0.28s ease, box-shadow 0.28s ease; | |
| } | |
| .glow-card:hover { transform: translateY(-6px); box-shadow: 0 22px 58px rgba(30,50,90,0.32); } | |
| /* --------------------- BUTTONS --------------------- */ | |
| .neon-btn, button { | |
| background: linear-gradient(90deg,#7fe9ff,#00a1ff) !important; | |
| color: #001528 !important; | |
| border-radius: 18px !important; | |
| padding: 10px 18px !important; | |
| font-weight: 800 !important; | |
| border: none !important; | |
| box-shadow: 0 16px 36px rgba(0,150,255,0.28), 0 0 120px rgba(0,190,255,0.24), inset 0 1px 0 rgba(255,255,255,0.06); | |
| transition: transform .18s ease, box-shadow .18s ease; | |
| } | |
| .neon-btn:hover, button:hover { | |
| transform: translateY(-8px) scale(1.06); | |
| box-shadow: 0 24px 58px rgba(0,160,255,0.46), 0 0 140px rgba(0,220,255,0.34); | |
| } | |
| .neon-btn:active, button:active { transform: scale(.98); } | |
| /* --------------------- MIC & AUDIO BOX --------------------- */ | |
| @keyframes micPulse { | |
| from { transform: scale(.985); box-shadow: 0 0 28px rgba(0,140,255,0.36); } | |
| to { transform: scale(1.04); box-shadow: 0 0 120px rgba(0,230,255,0.95); } | |
| } | |
| /* AUDIO BUTTONS ICONS */ | |
| /* --------------------- MIC & AUDIO ICONS FIX --------------------- */ | |
| /* Make all audio control buttons visible in Assistant Voice Output */ | |
| #audio_output_box .gradio-audio button, | |
| #audio_output_box .gradio-audio button svg, | |
| #audio_output_box .gradio-audio button i { | |
| color: #00f7ff !important; /* bright cyan */ | |
| fill: #00f7ff !important; | |
| stroke: #00f7ff !important; | |
| background: transparent !important; | |
| filter: drop-shadow(0 8px 26px rgba(0,255,255,0.7)) !important; | |
| } | |
| #audio_output_box { | |
| border-radius: 20px !important; | |
| padding: 8px !important; | |
| background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; | |
| border: 2px solid #aa66ff !important; | |
| box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1); | |
| } | |
| #audio_output_box audio { | |
| border-radius: 12px !important; | |
| height: 40px !important; | |
| width: 100% !important; | |
| background: linear-gradient(90deg, rgba(255,255,255,0.02), rgba(255,255,255,0.04)); | |
| border: 2px solid rgba(0,180,255,0.12) !important; | |
| } | |
| #audio_output_box .gradio-audio button, | |
| #audio_output_box .gradio-audio button svg, | |
| #audio_output_box .gradio-audio button i { | |
| color: #000000 !important; | |
| fill: #000000 !important; | |
| stroke: #000000 !important; | |
| opacity: 1 !important; | |
| filter: none !important; | |
| } | |
| /* AUDIO ELEMENTS */ | |
| #audio_output_box, #mic_box, #pdf_box, #img_box { | |
| border-radius: 20px !important; | |
| padding: 8px !important; | |
| background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; | |
| border: 2px solid #aa66ff !important; | |
| box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1); | |
| position: relative !important; | |
| overflow: visible !important; | |
| } | |
| /* ====================== PDF Upload Box ====================== */ | |
| #pdf_box, | |
| #pdf_box .file-container, | |
| #pdf_box .gr-file, | |
| #pdf_box input[type="file"], | |
| #pdf_box button { | |
| border-radius: 20px !important; | |
| padding: 8px !important; | |
| background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; /* same gradient as mic box */ | |
| border: 2px solid #aa66ff !important; | |
| box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1); | |
| color: #001528 !important; | |
| transition: transform 0.2s, box-shadow 0.2s; | |
| } | |
| /* Hover effect for PDF box */ | |
| #pdf_box:hover, | |
| #pdf_box .file-container:hover, | |
| #pdf_box .gr-file:hover, | |
| #pdf_box input[type="file"]:hover, | |
| #pdf_box button:hover { | |
| transform: translateY(-4px); | |
| box-shadow: 0 12px 40px rgba(30,50,90,0.32), inset 0 0 25px rgba(0,220,255,0.08); | |
| } | |
| /* ====================== Image Upload Box ====================== */ | |
| #img_box, | |
| #img_box .file-container, | |
| #img_box .gr-file, | |
| #img_box input[type="file"], | |
| #img_box button { | |
| border-radius: 20px !important; | |
| padding: 8px !important; | |
| background: linear-gradient(180deg, #f0e0ff, #d8c0ff) !important; /* same gradient as mic box */ | |
| border: 2px solid #aa66ff !important; | |
| box-shadow: 0 8px 30px rgba(0,200,255,0.2), inset 0 0 20px rgba(0,200,255,0.1); | |
| color: #001528 !important; | |
| transition: transform 0.2s, box-shadow 0.2s; | |
| } | |
| /* Hover effect for Image box */ | |
| #img_box:hover, | |
| #img_box .file-container:hover, | |
| #img_box .gr-file:hover, | |
| #img_box input[type="file"]:hover, | |
| #img_box button:hover { | |
| transform: translateY(-4px); | |
| box-shadow: 0 12px 40px rgba(30,50,90,0.32), inset 0 0 25px rgba(0,220,255,0.08); | |
| } | |
| /* Adjust mic glow ring to fit new box size */ | |
| #mic_box::before { | |
| width: 88px !important; | |
| height: 88px !important; | |
| left: -14px !important; | |
| top: -14px !important; | |
| } | |
| /* --------------------- CHAT BOT --------------------- */ | |
| .gr-chatbot { | |
| border-radius: 28px !important; | |
| padding: 12px !important; | |
| background: linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01)) !important; | |
| border: 1px solid rgba(120,170,255,0.06) !important; | |
| box-shadow: 0 16px 40px rgba(10,20,40,0.20); | |
| } | |
| .message.user { background: linear-gradient(90deg,#b0e0ff,#60b0ff) !important; color: #001528 !important; border-radius: 20px !important; padding:8px !important; } | |
| .message.bot { background: linear-gradient(90deg,#004cff,#00aaff) !important; color: #ffffff !important; border-radius: 20px !important; padding:8px !important; } | |
| /* --------------------- INPUTS & DROPDOWNS --------------------- */ | |
| label, .gradio-textbox label, .gradio-select label, .gradio-dropdown label, .gradio-file label { | |
| color: #001528 !important; | |
| font-weight: 800; | |
| } | |
| select, .gradio-dropdown, .gradio-select, .gradio-file, input, textarea { | |
| color: #00171f !important; | |
| background: rgba(255,255,255,0.96) !important; | |
| border: 1px solid rgba(0,120,255,0.12) !important; | |
| } | |
| .gradio-row .gradio-dropdown, .gradio-row select, .gradio-dropdown select { | |
| background: linear-gradient(180deg,#ffffff,#f0f8ff) !important; | |
| color: #00171f !important; | |
| font-weight: 800; | |
| border-radius: 12px !important; | |
| } | |
| /* --------------------- MARKDOWN & TEXT --------------------- */ | |
| .gr-markdown, h1, h2, h3, p { | |
| color: #effbff !important; | |
| } | |
| /* --------------------- ANIMATIONS --------------------- */ | |
| .section-slide { animation: slideIn 0.9s cubic-bezier(.2,.9,.3,1) both; } | |
| @keyframes slideIn { 0% { transform: translateY(26px); opacity: 0 } 100% { transform: translateY(0); opacity: 1 } } | |
| .shimmer { background: linear-gradient(90deg, rgba(255,255,255,0.02), rgba(255,255,255,0.06), rgba(255,255,255,0.02)); background-size: 200% 100%; animation: shimmer 2.8s infinite linear; } | |
| @keyframes shimmer { 0% { background-position: 200% 0 } 100% { background-position: -200% 0 } } | |
| /* --------------------- RESPONSIVE --------------------- */ | |
| @media (max-width: 800px) { #mic_box::before { left: -12px; top: -12px; width: 96px; height: 96px; } .app-title { font-size: 22px; } } | |
| @media (max-width: 480px) { #mic_box::before { left: -10px; top: -10px; width: 80px; height: 80px; } #mic_box audio { height: 36px !important; } #mic_box svg, #mic_box i { font-size: 20px !important; } } | |
| </style> | |
| """) | |
| # Header (title color adjusted for visibility) | |
| gr.HTML(""" | |
| <div class="header-box"> | |
| <div class="app-title">⚡ OmniSense AI Bot — Ultra Neon</div> | |
| <div class="app-sub">Voice • PDF • Image — Multi-Modal Intelligence with Neon FX</div> | |
| </div> | |
| """) | |
| # Keep the main markdown visible and readable | |
| gr.Markdown("## 🛠 Multi-Mode AI Assistant (Voice, PDF, Image)") | |
| session_voice = gr.State(str(uuid.uuid4())) | |
| session_pdf = gr.State(str(uuid.uuid4())) | |
| session_image = gr.State(str(uuid.uuid4())) | |
| # ---------------- Voice Tab ---------------- | |
| with gr.Tab("🎤 Voice Chat"): | |
| chat_voice = gr.Chatbot(height=320, elem_classes=["section-slide"]) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| mic = gr.Audio(type="filepath", label="🎤 Record Voice (hold & speak)", elem_id="mic_box", interactive=True) | |
| audio_output = gr.Audio(label="Assistant Voice Output", type="filepath", interactive=False,elem_id="audio_output_box") | |
| tts_lang = gr.Dropdown(choices=["en","ur"], value="en", label="TTS Language") | |
| with gr.Row(elem_classes=["section-slide"]): | |
| btn_general = gr.Button("⚡Ask General 🎯", elem_classes=["neon-btn"]) | |
| btn_pdf = gr.Button("⚡Ask PDF 📄", elem_classes=["neon-btn"]) | |
| btn_image = gr.Button("⚡Ask Image 🖼", elem_classes=["neon-btn"]) | |
| enhancer_toggle = gr.Checkbox(label="Enable Response Enhancer", value=False, scale=1) | |
| tone_dropdown = gr.Dropdown(choices=["Helpful","Formal","Friendly"], value="Helpful", label="Enhancer Tone", scale=1) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| btn_reset_logs = gr.Button("♻ Reset LOGs", elem_classes=["neon-btn"]) | |
| btn_download_logs = gr.Button("📥 Download Summary", elem_classes=["neon-btn"]) | |
| Voice_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1,elem_id="summary_file_voice") | |
| answer_voice = gr.Textbox(label="Assistant Answer", lines=2, visible=False) | |
| # Bind click handlers (functionality unchanged) | |
| btn_general.click(fn=handle_voice_general, inputs=[mic, session_voice, tts_lang, enhancer_toggle, tone_dropdown], outputs=[answer_voice, audio_output, chat_voice]) | |
| btn_pdf.click(fn=handle_voice_pdf, inputs=[mic, session_pdf, tts_lang], outputs=[answer_voice, audio_output, chat_voice]) | |
| btn_image.click(fn=handle_voice_image, inputs=[mic, session_image, tts_lang], outputs=[answer_voice, audio_output, chat_voice]) | |
| btn_reset_logs.click(lambda: (str(uuid.uuid4()), [], None, None, ""), outputs=[session_voice, chat_voice, mic, audio_output, answer_voice]) | |
| btn_download_logs.click(download_pdf_summary, inputs=[session_voice], outputs=[Voice_summary_file]) | |
| # ---------------- PDF Tab ---------------- | |
| with gr.Tab("📄 PDF Summarizer"): | |
| pdf_output = gr.Textbox(label="Answer (Text Only)", lines=5, elem_classes=["glow-card","section-slide"]) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| pdf_upload_btn = gr.File(label="Upload PDF", file_types=[".pdf"], scale=1,elem_id="pdf_box") | |
| pdf_question = gr.Textbox(label="Ask a question about PDF", lines=3) | |
| pdf_upload_msg = gr.Textbox(label="Upload Status", interactive=False) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| pdf_send_btn = gr.Button("Ask (Questions)", elem_classes=["neon-btn"]) | |
| pdf_reset_btn = gr.Button("♻ Reset LOGs", elem_classes=["neon-btn"]) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| pdf_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1) | |
| pdf_download_btn = gr.Button("📥 Download Summary", elem_classes=["neon-btn"]) | |
| pdf_upload_btn.upload(handle_pdf_upload, inputs=[pdf_upload_btn, session_pdf], outputs=[pdf_upload_msg]) | |
| pdf_send_btn.click(handle_text_pdf, inputs=[pdf_question, session_pdf], outputs=[pdf_output]) | |
| pdf_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_pdf, pdf_output]) | |
| pdf_download_btn.click(download_pdf_summary, inputs=[session_pdf], outputs=[pdf_summary_file]) | |
| # ---------------- IMAGE Tab ---------------- | |
| with gr.Tab("🖼 Image OCR"): | |
| image_output = gr.Textbox(label="Answer (Text Only)", lines=5, elem_classes=["glow-card","section-slide"]) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| image_upload_btn = gr.File(label="Upload Image", file_types=[".png",".jpg",".jpeg"], scale=1, elem_id="img_box") | |
| image_question = gr.Textbox(label="Ask question about Image", lines=3) | |
| image_upload_msg = gr.Textbox(label="Upload Status", interactive=False) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| image_send_btn = gr.Button("Ask (Questions)", elem_classes=["neon-btn"]) | |
| image_reset_btn = gr.Button("♻ Reset LOGs", elem_classes=["neon-btn"]) | |
| with gr.Row(elem_classes=["section-slide"]): | |
| image_summary_file = gr.File(label="📥Download Summary File", interactive=False, scale=1) | |
| image_download_btn = gr.Button("📥 Download Summary", elem_classes=["neon-btn"]) | |
| image_upload_btn.upload(handle_image_upload, inputs=[image_upload_btn, session_image], outputs=[image_upload_msg, image_output]) | |
| image_send_btn.click(handle_text_image, inputs=[image_question, session_image], outputs=[image_output]) | |
| image_reset_btn.click(lambda: (str(uuid.uuid4()), ""), outputs=[session_image, image_output]) | |
| image_download_btn.click(download_pdf_summary, inputs=[session_image], outputs=[image_summary_file]) | |
| if __name__ == "__main__": | |
| demo.launch() |