CT-Chat-V2 / app.py
essprasad's picture
Update app.py
004258c verified
# app.py — Updated: autocomplete + no-cache + keep existing functions
# Based on your uploaded app (app (3).py). Kept functions intact and only added UI + JS enhancements.
import gradio as gr
import time
import re
from pathlib import Path
from api import summarize_combined_wrapper
from fastapi import FastAPI
from fastapi.responses import JSONResponse
# -----------------------------------------
# TEMP: Dataset Path Debugger
# -----------------------------------------
import os, glob
DISCLAIMER_TEXT = (
"This app is intended for educational and informational purposes only. "
"It does not provide medical advice, diagnosis, or treatment. "
"Content is derived from publicly available, authoritative sources "
"including FDA, ICH, SCDM, CDISC, and similar organizations."
)
print("\n===== DATASET CHECK =====")
print("HOME DIR:", os.listdir("/home"))
print("USER DIR:", os.listdir("/home/user"))
print("HF CACHE:", glob.glob("/home/user/.cache/huggingface/datasets/*"))
print("HF SNAPSHOTS:", glob.glob("/home/user/.cache/huggingface/datasets/**", recursive=True))
print("==========================\n")
# -----------------------------
# Chat response streamer (unchanged)
# -----------------------------
def stream_chat_generator(question: str):
if not question or not question.strip():
yield "<i>Please enter a question.</i>"
return
try:
res = summarize_combined_wrapper(question)
full = res.get("answer", "") if isinstance(res, dict) else str(res)
except Exception as e:
full = f"Error: {e}"
# stream in chunks
CHUNK = 80
for i in range(0, len(full), CHUNK):
yield full[: i + CHUNK]
time.sleep(0.025)
# -----------------------------
# Load Glossary From File
# -----------------------------
GLOSSARY_FILE = Path("glossary.html")
if not GLOSSARY_FILE.exists():
# create minimal placeholder if missing
GLOSSARY_HTML = "<div id='terms'>(glossary.html not found — please upload)</div>"
else:
GLOSSARY_HTML = GLOSSARY_FILE.read_text(encoding="utf-8")
# -----------------------------
# Build autocomplete terms list from glossary.html (dedupe + sort)
# -----------------------------
def extract_terms_from_glossary(html_text: str):
"""
Heuristic extraction:
- find large comma-separated blocks inside the glossary file and extract tokens
- normalize whitespace, strip punctuation, dedupe (case-insensitive)
"""
# remove HTML tags (simple)
text = re.sub(r"<[^>]+>", " ", html_text)
# collapse multiple spaces
text = re.sub(r"\s+", " ", text)
# find sequences that look like many comma-separated tokens:
candidates = []
# pick long segments containing commas
for seg in re.split(r"[;\n\r]", text):
if seg.count(",") >= 3 or len(seg.split()) > 20:
candidates.append(seg)
tokens = []
for seg in candidates:
parts = [p.strip() for p in seg.split(",")]
for p in parts:
# remove stray parentheses-only content at ends
cleaned = re.sub(r'^\(|\)$', '', p).strip()
# skip very short tokens like single characters
if cleaned and len(cleaned) > 1:
# keep original capitalization but normalize whitespace
cleaned = re.sub(r"\s+", " ", cleaned)
tokens.append(cleaned)
# fallback: if tokens empty, try to split entire text by commas
if not tokens:
tokens = [p.strip() for p in text.split(",") if len(p.strip()) > 1]
# dedupe case-insensitively, preserve first-seen capitalization
seen = {}
for t in tokens:
key = t.lower()
if key not in seen:
seen[key] = t
terms = sorted(seen.values(), key=lambda s: s.lower())
return terms
AUTOCOMPLETE_TERMS = extract_terms_from_glossary(GLOSSARY_HTML)
# Build datalist options string (safe-escaped)
def build_options_html(terms):
opt_lines = []
for t in terms:
# escape double quotes in value attribute
v = t.replace('"', "&quot;")
opt_lines.append(f'<option value="{v}">')
return "\n".join(opt_lines)
DATALIST_OPTIONS = build_options_html(AUTOCOMPLETE_TERMS)
# -----------------------------
# CSS (dark-mode safe + hide HF header)
# -----------------------------
custom_css = """
/* FULLY HIDE HuggingFace Space Header + Banner + Buttons */
#header,
header,
.svelte-1ipelgc,
.svelte-1ed2p3z,
.prose a[href*='huggingface'],
button[aria-label="Like"],
button[aria-label="Duplicate"],
button[aria-label="Open in Spaces"],
a[href*="huggingface.co/spaces"],
footer,
#space-info,
#space-info-container,
div[id^="space-header"],
div.space-header,
div.space-info,
div#block-landing-page,
div#footer-container {
display: none !important;
visibility: hidden !important;
opacity: 0 !important;
height: 0 !important;
max-height: 0 !important;
padding: 0 !important;
margin: 0 !important;
pointer-events: none !important;
}
/* Remove top gap after hiding HF header */
.gradio-container, body {
padding-top: 0 !important;
margin-top: 0 !important;
}
/* UI Styling */
body, .gradio-container {
background: white !important;
-webkit-font-smoothing: antialiased;
}
*, .prose * {
color: #222 !important;
}
input, textarea {
background: #fff !important;
color: #222 !important;
border: 1px solid #777 !important;
}
input::placeholder, textarea::placeholder {
color: #666 !important;
}
button {
color: white !important;
}
.header {
display: flex;
align-items: flex-start;
gap: 12px;
margin-bottom: 12px;
}
.logo {
width: 48px;
height: 48px;
background: #0ea5a4;
border-radius: 8px;
color: white;
font-size: 20px;
font-weight: bold;
display: flex;
align-items: center;
justify-content: center;
}
.title-text {
font-size: 20px;
font-weight: 600;
}
.glossary-box {
background: #f7f7f7;
padding: 12px;
border-radius: 8px;
max-height: 420px;
overflow-y: auto;
font-size: 14px;
line-height: 1.45;
white-space: normal;
}
/* make datalist suggestions easier to see on mobile */
input[list]::-webkit-calendar-picker-indicator { display: none; }
@media (max-width: 600px) {
.gradio-container { padding: 10px !important; }
button { font-size: 16px !important; }
}
"""
# -----------------------------
# Gradio App Layout (with datalist and JS)
# -----------------------------
with gr.Blocks(css=custom_css, title="Clinical Research Dictionary") as demo:
# No-cache meta + FULL hide HF header/banner repeatedly
gr.HTML("""
<meta http-equiv="Cache-Control" content="no-store" />
<script>
function hideHF() {
const bad = document.querySelectorAll(
"#header, header, \
.svelte-1ipelgc, .svelte-1ed2p3z, \
a[href*='huggingface.co'], \
button[aria-label='Like'], \
button[aria-label='Duplicate'], \
button[aria-label='Open in Spaces'], \
#space-info, #space-info-container, \
div[id^='space-header'], \
div.space-header, div.space-info, \
footer"
);
bad.forEach(el => {
try {
el.style.display = "none";
el.style.visibility = "hidden";
el.style.opacity = "0";
el.style.height = "0px";
el.style.maxHeight = "0px";
el.style.margin = "0px";
el.style.padding = "0px";
el.style.pointerEvents = "none";
} catch(e) {}
});
}
// Keep hiding header (Gradio re-renders DOM often)
setInterval(hideHF, 400);
setTimeout(hideHF, 50);
</script>
""")
# Header HTML (keeps your look)
gr.HTML("""
<div class='header'>
<div class='logo'>CT</div>
<div>
<div class='title-text'>Clinical Research Dictionary</div>
<div style='font-size:14px; color:#444'>
Search for any clinical research term or acronym —
Answers sourced from official CDISC,SCDM,ICH,FDA documents/websites exactly as they appear in sources.
</div>
</div>
</div>
""")
# Inject a datalist element populated server-side from glossary terms
gr.HTML(f"""
<!-- AUTOCOMPLETE DATALIST (built server-side from glossary.html) -->
<datalist id="terms-list">
{DATALIST_OPTIONS}
</datalist>
<script>
// Attach datalist to Gradio textbox once the DOM is ready.
// Gradio's textbox gets an input element we target by aria-label attribute.
function attachDatalist() {{
// find input by label text (safe fallback)
const inputs = Array.from(document.querySelectorAll('input[type="text"], input:not([type])'));
let target = null;
for (const el of inputs) {{
const label = el.getAttribute('aria-label') || el.getAttribute('placeholder') || "";
if (label.toLowerCase().includes('term') || label.toLowerCase().includes('question') || label.toLowerCase().includes('your question')) {{
target = el;
break;
}}
}}
// fallback: first text input
if (!target && inputs.length) {{
target = inputs[0];
}}
if (!target) return;
target.setAttribute('list', 'terms-list');
// small UX: show datalist on focus (works in most browsers)
target.addEventListener('focus', (e) => {{
// show suggestions by briefly blurring/focusing -- many browsers show automatically
// nothing fancy here — modern browsers handle datalist filtering
}});
}}
// try attaching repeatedly (Gradio may render after script)
let tries = 0;
const attachInterval = setInterval(() => {{
attachDatalist();
tries++;
if (tries > 20) clearInterval(attachInterval);
}}, 200);
</script>
""")
# Search row (keeps your layout)
with gr.Row():
q = gr.Textbox(
label="Term/Acronym",
placeholder="e.g. What is an eCRF?",
lines=1,
scale=4,
elem_id="queryBox"
)
submit = gr.Button("Submit", variant="primary", scale=1)
out = gr.HTML()
submit.click(stream_chat_generator, inputs=q, outputs=out)
# Glossary Title + content
#gr.Markdown("### <span style='color: #FF6600;'>Available Clinical Trial Terms & Acronyms (3000+)</span>")
gr.HTML("<h3 style='color: orange;'>Available Clinical Trial Terms & Acronyms (3000+)</h3>")
gr.HTML(f"""
<div class="glossary-box">
{GLOSSARY_HTML}
</div>
""")
# ============================================================
# Mobile API Proxy (FastAPI backend)
# ============================================================
from fastapi import FastAPI
from fastapi.responses import JSONResponse
import gradio as gr
# Create the proxy FastAPI app
proxy_api = FastAPI()
@proxy_api.post("/chat")
async def mobile_chat(request: dict):
question = request.get("question", "")
result = summarize_combined_wrapper(question)
return JSONResponse(content={
"answer": result.get("answer", ""),
"citations": result.get("citations", []),
"disclaimer": DISCLAIMER_TEXT,
"status": "success"
})
# ============================================================
# CREATE ROOT APP (FastAPI) AND MOUNT BOTH UI + API
# ============================================================
root_app = FastAPI()
from datetime import datetime
@root_app.get("/health")
def health_check():
return {
"status": "ok",
"service": "ct-chat",
"timestamp": datetime.utcnow().isoformat()
}
# Mount /api → FastAPI backend
root_app.mount("/api", proxy_api)
# Mount / → Gradio UI
root_app = gr.mount_gradio_app(root_app, demo, path="/")
# ============================================================
# Launch the *root_app*, not demo
# ============================================================
import uvicorn
uvicorn.run(root_app, host="0.0.0.0", port=7860)