Spaces:
Running
Running
| # app.py — Updated: autocomplete + no-cache + keep existing functions | |
| # Based on your uploaded app (app (3).py). Kept functions intact and only added UI + JS enhancements. | |
| import gradio as gr | |
| import time | |
| import re | |
| from pathlib import Path | |
| from api import summarize_combined_wrapper | |
| from fastapi import FastAPI | |
| from fastapi.responses import JSONResponse | |
| # ----------------------------------------- | |
| # TEMP: Dataset Path Debugger | |
| # ----------------------------------------- | |
| import os, glob | |
| DISCLAIMER_TEXT = ( | |
| "This app is intended for educational and informational purposes only. " | |
| "It does not provide medical advice, diagnosis, or treatment. " | |
| "Content is derived from publicly available, authoritative sources " | |
| "including FDA, ICH, SCDM, CDISC, and similar organizations." | |
| ) | |
| print("\n===== DATASET CHECK =====") | |
| print("HOME DIR:", os.listdir("/home")) | |
| print("USER DIR:", os.listdir("/home/user")) | |
| print("HF CACHE:", glob.glob("/home/user/.cache/huggingface/datasets/*")) | |
| print("HF SNAPSHOTS:", glob.glob("/home/user/.cache/huggingface/datasets/**", recursive=True)) | |
| print("==========================\n") | |
| # ----------------------------- | |
| # Chat response streamer (unchanged) | |
| # ----------------------------- | |
| def stream_chat_generator(question: str): | |
| if not question or not question.strip(): | |
| yield "<i>Please enter a question.</i>" | |
| return | |
| try: | |
| res = summarize_combined_wrapper(question) | |
| full = res.get("answer", "") if isinstance(res, dict) else str(res) | |
| except Exception as e: | |
| full = f"Error: {e}" | |
| # stream in chunks | |
| CHUNK = 80 | |
| for i in range(0, len(full), CHUNK): | |
| yield full[: i + CHUNK] | |
| time.sleep(0.025) | |
| # ----------------------------- | |
| # Load Glossary From File | |
| # ----------------------------- | |
| GLOSSARY_FILE = Path("glossary.html") | |
| if not GLOSSARY_FILE.exists(): | |
| # create minimal placeholder if missing | |
| GLOSSARY_HTML = "<div id='terms'>(glossary.html not found — please upload)</div>" | |
| else: | |
| GLOSSARY_HTML = GLOSSARY_FILE.read_text(encoding="utf-8") | |
| # ----------------------------- | |
| # Build autocomplete terms list from glossary.html (dedupe + sort) | |
| # ----------------------------- | |
| def extract_terms_from_glossary(html_text: str): | |
| """ | |
| Heuristic extraction: | |
| - find large comma-separated blocks inside the glossary file and extract tokens | |
| - normalize whitespace, strip punctuation, dedupe (case-insensitive) | |
| """ | |
| # remove HTML tags (simple) | |
| text = re.sub(r"<[^>]+>", " ", html_text) | |
| # collapse multiple spaces | |
| text = re.sub(r"\s+", " ", text) | |
| # find sequences that look like many comma-separated tokens: | |
| candidates = [] | |
| # pick long segments containing commas | |
| for seg in re.split(r"[;\n\r]", text): | |
| if seg.count(",") >= 3 or len(seg.split()) > 20: | |
| candidates.append(seg) | |
| tokens = [] | |
| for seg in candidates: | |
| parts = [p.strip() for p in seg.split(",")] | |
| for p in parts: | |
| # remove stray parentheses-only content at ends | |
| cleaned = re.sub(r'^\(|\)$', '', p).strip() | |
| # skip very short tokens like single characters | |
| if cleaned and len(cleaned) > 1: | |
| # keep original capitalization but normalize whitespace | |
| cleaned = re.sub(r"\s+", " ", cleaned) | |
| tokens.append(cleaned) | |
| # fallback: if tokens empty, try to split entire text by commas | |
| if not tokens: | |
| tokens = [p.strip() for p in text.split(",") if len(p.strip()) > 1] | |
| # dedupe case-insensitively, preserve first-seen capitalization | |
| seen = {} | |
| for t in tokens: | |
| key = t.lower() | |
| if key not in seen: | |
| seen[key] = t | |
| terms = sorted(seen.values(), key=lambda s: s.lower()) | |
| return terms | |
| AUTOCOMPLETE_TERMS = extract_terms_from_glossary(GLOSSARY_HTML) | |
| # Build datalist options string (safe-escaped) | |
| def build_options_html(terms): | |
| opt_lines = [] | |
| for t in terms: | |
| # escape double quotes in value attribute | |
| v = t.replace('"', """) | |
| opt_lines.append(f'<option value="{v}">') | |
| return "\n".join(opt_lines) | |
| DATALIST_OPTIONS = build_options_html(AUTOCOMPLETE_TERMS) | |
| # ----------------------------- | |
| # CSS (dark-mode safe + hide HF header) | |
| # ----------------------------- | |
| custom_css = """ | |
| /* FULLY HIDE HuggingFace Space Header + Banner + Buttons */ | |
| #header, | |
| header, | |
| .svelte-1ipelgc, | |
| .svelte-1ed2p3z, | |
| .prose a[href*='huggingface'], | |
| button[aria-label="Like"], | |
| button[aria-label="Duplicate"], | |
| button[aria-label="Open in Spaces"], | |
| a[href*="huggingface.co/spaces"], | |
| footer, | |
| #space-info, | |
| #space-info-container, | |
| div[id^="space-header"], | |
| div.space-header, | |
| div.space-info, | |
| div#block-landing-page, | |
| div#footer-container { | |
| display: none !important; | |
| visibility: hidden !important; | |
| opacity: 0 !important; | |
| height: 0 !important; | |
| max-height: 0 !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| pointer-events: none !important; | |
| } | |
| /* Remove top gap after hiding HF header */ | |
| .gradio-container, body { | |
| padding-top: 0 !important; | |
| margin-top: 0 !important; | |
| } | |
| /* UI Styling */ | |
| body, .gradio-container { | |
| background: white !important; | |
| -webkit-font-smoothing: antialiased; | |
| } | |
| *, .prose * { | |
| color: #222 !important; | |
| } | |
| input, textarea { | |
| background: #fff !important; | |
| color: #222 !important; | |
| border: 1px solid #777 !important; | |
| } | |
| input::placeholder, textarea::placeholder { | |
| color: #666 !important; | |
| } | |
| button { | |
| color: white !important; | |
| } | |
| .header { | |
| display: flex; | |
| align-items: flex-start; | |
| gap: 12px; | |
| margin-bottom: 12px; | |
| } | |
| .logo { | |
| width: 48px; | |
| height: 48px; | |
| background: #0ea5a4; | |
| border-radius: 8px; | |
| color: white; | |
| font-size: 20px; | |
| font-weight: bold; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| } | |
| .title-text { | |
| font-size: 20px; | |
| font-weight: 600; | |
| } | |
| .glossary-box { | |
| background: #f7f7f7; | |
| padding: 12px; | |
| border-radius: 8px; | |
| max-height: 420px; | |
| overflow-y: auto; | |
| font-size: 14px; | |
| line-height: 1.45; | |
| white-space: normal; | |
| } | |
| /* make datalist suggestions easier to see on mobile */ | |
| input[list]::-webkit-calendar-picker-indicator { display: none; } | |
| @media (max-width: 600px) { | |
| .gradio-container { padding: 10px !important; } | |
| button { font-size: 16px !important; } | |
| } | |
| """ | |
| # ----------------------------- | |
| # Gradio App Layout (with datalist and JS) | |
| # ----------------------------- | |
| with gr.Blocks(css=custom_css, title="Clinical Research Dictionary") as demo: | |
| # No-cache meta + FULL hide HF header/banner repeatedly | |
| gr.HTML(""" | |
| <meta http-equiv="Cache-Control" content="no-store" /> | |
| <script> | |
| function hideHF() { | |
| const bad = document.querySelectorAll( | |
| "#header, header, \ | |
| .svelte-1ipelgc, .svelte-1ed2p3z, \ | |
| a[href*='huggingface.co'], \ | |
| button[aria-label='Like'], \ | |
| button[aria-label='Duplicate'], \ | |
| button[aria-label='Open in Spaces'], \ | |
| #space-info, #space-info-container, \ | |
| div[id^='space-header'], \ | |
| div.space-header, div.space-info, \ | |
| footer" | |
| ); | |
| bad.forEach(el => { | |
| try { | |
| el.style.display = "none"; | |
| el.style.visibility = "hidden"; | |
| el.style.opacity = "0"; | |
| el.style.height = "0px"; | |
| el.style.maxHeight = "0px"; | |
| el.style.margin = "0px"; | |
| el.style.padding = "0px"; | |
| el.style.pointerEvents = "none"; | |
| } catch(e) {} | |
| }); | |
| } | |
| // Keep hiding header (Gradio re-renders DOM often) | |
| setInterval(hideHF, 400); | |
| setTimeout(hideHF, 50); | |
| </script> | |
| """) | |
| # Header HTML (keeps your look) | |
| gr.HTML(""" | |
| <div class='header'> | |
| <div class='logo'>CT</div> | |
| <div> | |
| <div class='title-text'>Clinical Research Dictionary</div> | |
| <div style='font-size:14px; color:#444'> | |
| Search for any clinical research term or acronym — | |
| Answers sourced from official CDISC,SCDM,ICH,FDA documents/websites exactly as they appear in sources. | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Inject a datalist element populated server-side from glossary terms | |
| gr.HTML(f""" | |
| <!-- AUTOCOMPLETE DATALIST (built server-side from glossary.html) --> | |
| <datalist id="terms-list"> | |
| {DATALIST_OPTIONS} | |
| </datalist> | |
| <script> | |
| // Attach datalist to Gradio textbox once the DOM is ready. | |
| // Gradio's textbox gets an input element we target by aria-label attribute. | |
| function attachDatalist() {{ | |
| // find input by label text (safe fallback) | |
| const inputs = Array.from(document.querySelectorAll('input[type="text"], input:not([type])')); | |
| let target = null; | |
| for (const el of inputs) {{ | |
| const label = el.getAttribute('aria-label') || el.getAttribute('placeholder') || ""; | |
| if (label.toLowerCase().includes('term') || label.toLowerCase().includes('question') || label.toLowerCase().includes('your question')) {{ | |
| target = el; | |
| break; | |
| }} | |
| }} | |
| // fallback: first text input | |
| if (!target && inputs.length) {{ | |
| target = inputs[0]; | |
| }} | |
| if (!target) return; | |
| target.setAttribute('list', 'terms-list'); | |
| // small UX: show datalist on focus (works in most browsers) | |
| target.addEventListener('focus', (e) => {{ | |
| // show suggestions by briefly blurring/focusing -- many browsers show automatically | |
| // nothing fancy here — modern browsers handle datalist filtering | |
| }}); | |
| }} | |
| // try attaching repeatedly (Gradio may render after script) | |
| let tries = 0; | |
| const attachInterval = setInterval(() => {{ | |
| attachDatalist(); | |
| tries++; | |
| if (tries > 20) clearInterval(attachInterval); | |
| }}, 200); | |
| </script> | |
| """) | |
| # Search row (keeps your layout) | |
| with gr.Row(): | |
| q = gr.Textbox( | |
| label="Term/Acronym", | |
| placeholder="e.g. What is an eCRF?", | |
| lines=1, | |
| scale=4, | |
| elem_id="queryBox" | |
| ) | |
| submit = gr.Button("Submit", variant="primary", scale=1) | |
| out = gr.HTML() | |
| submit.click(stream_chat_generator, inputs=q, outputs=out) | |
| # Glossary Title + content | |
| #gr.Markdown("### <span style='color: #FF6600;'>Available Clinical Trial Terms & Acronyms (3000+)</span>") | |
| gr.HTML("<h3 style='color: orange;'>Available Clinical Trial Terms & Acronyms (3000+)</h3>") | |
| gr.HTML(f""" | |
| <div class="glossary-box"> | |
| {GLOSSARY_HTML} | |
| </div> | |
| """) | |
| # ============================================================ | |
| # Mobile API Proxy (FastAPI backend) | |
| # ============================================================ | |
| from fastapi import FastAPI | |
| from fastapi.responses import JSONResponse | |
| import gradio as gr | |
| # Create the proxy FastAPI app | |
| proxy_api = FastAPI() | |
| async def mobile_chat(request: dict): | |
| question = request.get("question", "") | |
| result = summarize_combined_wrapper(question) | |
| return JSONResponse(content={ | |
| "answer": result.get("answer", ""), | |
| "citations": result.get("citations", []), | |
| "disclaimer": DISCLAIMER_TEXT, | |
| "status": "success" | |
| }) | |
| # ============================================================ | |
| # CREATE ROOT APP (FastAPI) AND MOUNT BOTH UI + API | |
| # ============================================================ | |
| root_app = FastAPI() | |
| from datetime import datetime | |
| def health_check(): | |
| return { | |
| "status": "ok", | |
| "service": "ct-chat", | |
| "timestamp": datetime.utcnow().isoformat() | |
| } | |
| # Mount /api → FastAPI backend | |
| root_app.mount("/api", proxy_api) | |
| # Mount / → Gradio UI | |
| root_app = gr.mount_gradio_app(root_app, demo, path="/") | |
| # ============================================================ | |
| # Launch the *root_app*, not demo | |
| # ============================================================ | |
| import uvicorn | |
| uvicorn.run(root_app, host="0.0.0.0", port=7860) | |