Spaces:

essprasad
/

CT-Chat-V2

Running

App Files Files Community

CT-Chat-V2 / app.py

essprasad

Update app.py

004258c verified about 1 month ago

raw

history blame contribute delete

12.2 kB

	# app.py — Updated: autocomplete + no-cache + keep existing functions
	# Based on your uploaded app (app (3).py). Kept functions intact and only added UI + JS enhancements.
	import gradio as gr
	import time
	import re
	from pathlib import Path
	from api import summarize_combined_wrapper
	from fastapi import FastAPI
	from fastapi.responses import JSONResponse


	# -----------------------------------------
	# TEMP: Dataset Path Debugger
	# -----------------------------------------
	import os, glob

	DISCLAIMER_TEXT = (
	"This app is intended for educational and informational purposes only. "
	"It does not provide medical advice, diagnosis, or treatment. "
	"Content is derived from publicly available, authoritative sources "
	"including FDA, ICH, SCDM, CDISC, and similar organizations."
	)
	print("\n===== DATASET CHECK =====")
	print("HOME DIR:", os.listdir("/home"))
	print("USER DIR:", os.listdir("/home/user"))
	print("HF CACHE:", glob.glob("/home/user/.cache/huggingface/datasets/*"))
	print("HF SNAPSHOTS:", glob.glob("/home/user/.cache/huggingface/datasets/**", recursive=True))
	print("==========================\n")

	# -----------------------------
	# Chat response streamer (unchanged)
	# -----------------------------
	def stream_chat_generator(question: str):
	if not question or not question.strip():
	yield "<i>Please enter a question.</i>"
	return

	try:
	res = summarize_combined_wrapper(question)
	full = res.get("answer", "") if isinstance(res, dict) else str(res)
	except Exception as e:
	full = f"Error: {e}"

	# stream in chunks
	CHUNK = 80
	for i in range(0, len(full), CHUNK):
	yield full[: i + CHUNK]
	time.sleep(0.025)


	# -----------------------------
	# Load Glossary From File
	# -----------------------------
	GLOSSARY_FILE = Path("glossary.html")
	if not GLOSSARY_FILE.exists():
	# create minimal placeholder if missing
	GLOSSARY_HTML = "<div id='terms'>(glossary.html not found — please upload)</div>"
	else:
	GLOSSARY_HTML = GLOSSARY_FILE.read_text(encoding="utf-8")


	# -----------------------------
	# Build autocomplete terms list from glossary.html (dedupe + sort)
	# -----------------------------
	def extract_terms_from_glossary(html_text: str):
	"""
	Heuristic extraction:
	- find large comma-separated blocks inside the glossary file and extract tokens
	- normalize whitespace, strip punctuation, dedupe (case-insensitive)
	"""
	# remove HTML tags (simple)
	text = re.sub(r"<[^>]+>", " ", html_text)
	# collapse multiple spaces
	text = re.sub(r"\s+", " ", text)
	# find sequences that look like many comma-separated tokens:
	candidates = []
	# pick long segments containing commas
	for seg in re.split(r"[;\n\r]", text):
	if seg.count(",") >= 3 or len(seg.split()) > 20:
	candidates.append(seg)

	tokens = []
	for seg in candidates:
	parts = [p.strip() for p in seg.split(",")]
	for p in parts:
	# remove stray parentheses-only content at ends
	cleaned = re.sub(r'^$\|$$', '', p).strip()
	# skip very short tokens like single characters
	if cleaned and len(cleaned) > 1:
	# keep original capitalization but normalize whitespace
	cleaned = re.sub(r"\s+", " ", cleaned)
	tokens.append(cleaned)

	# fallback: if tokens empty, try to split entire text by commas
	if not tokens:
	tokens = [p.strip() for p in text.split(",") if len(p.strip()) > 1]

	# dedupe case-insensitively, preserve first-seen capitalization
	seen = {}
	for t in tokens:
	key = t.lower()
	if key not in seen:
	seen[key] = t

	terms = sorted(seen.values(), key=lambda s: s.lower())
	return terms

	AUTOCOMPLETE_TERMS = extract_terms_from_glossary(GLOSSARY_HTML)

	# Build datalist options string (safe-escaped)
	def build_options_html(terms):
	opt_lines = []
	for t in terms:
	# escape double quotes in value attribute
	v = t.replace('"', """)
	opt_lines.append(f'<option value="{v}">')
	return "\n".join(opt_lines)

	DATALIST_OPTIONS = build_options_html(AUTOCOMPLETE_TERMS)


	# -----------------------------
	# CSS (dark-mode safe + hide HF header)
	# -----------------------------
	custom_css = """
	/* FULLY HIDE HuggingFace Space Header + Banner + Buttons */

	#header,
	header,
	.svelte-1ipelgc,
	.svelte-1ed2p3z,
	.prose a[href*='huggingface'],
	button[aria-label="Like"],
	button[aria-label="Duplicate"],
	button[aria-label="Open in Spaces"],
	a[href*="huggingface.co/spaces"],
	footer,
	#space-info,
	#space-info-container,
	div[id^="space-header"],
	div.space-header,
	div.space-info,
	div#block-landing-page,
	div#footer-container {
	display: none !important;
	visibility: hidden !important;
	opacity: 0 !important;
	height: 0 !important;
	max-height: 0 !important;
	padding: 0 !important;
	margin: 0 !important;
	pointer-events: none !important;
	}

	/* Remove top gap after hiding HF header */
	.gradio-container, body {
	padding-top: 0 !important;
	margin-top: 0 !important;
	}

	/* UI Styling */
	body, .gradio-container {
	background: white !important;
	-webkit-font-smoothing: antialiased;
	}
	, .prose {
	color: #222 !important;
	}
	input, textarea {
	background: #fff !important;
	color: #222 !important;
	border: 1px solid #777 !important;
	}
	input::placeholder, textarea::placeholder {
	color: #666 !important;
	}
	button {
	color: white !important;
	}
	.header {
	display: flex;
	align-items: flex-start;
	gap: 12px;
	margin-bottom: 12px;
	}
	.logo {
	width: 48px;
	height: 48px;
	background: #0ea5a4;
	border-radius: 8px;
	color: white;
	font-size: 20px;
	font-weight: bold;
	display: flex;
	align-items: center;
	justify-content: center;
	}
	.title-text {
	font-size: 20px;
	font-weight: 600;
	}
	.glossary-box {
	background: #f7f7f7;
	padding: 12px;
	border-radius: 8px;
	max-height: 420px;
	overflow-y: auto;
	font-size: 14px;
	line-height: 1.45;
	white-space: normal;
	}
	/* make datalist suggestions easier to see on mobile */
	input[list]::-webkit-calendar-picker-indicator { display: none; }
	@media (max-width: 600px) {
	.gradio-container { padding: 10px !important; }
	button { font-size: 16px !important; }
	}
	"""

	# -----------------------------
	# Gradio App Layout (with datalist and JS)
	# -----------------------------
	with gr.Blocks(css=custom_css, title="Clinical Research Dictionary") as demo:

	# No-cache meta + FULL hide HF header/banner repeatedly
	gr.HTML("""
	<meta http-equiv="Cache-Control" content="no-store" />
	<script>
	function hideHF() {
	const bad = document.querySelectorAll(
	"#header, header, \
	.svelte-1ipelgc, .svelte-1ed2p3z, \
	a[href*='huggingface.co'], \
	button[aria-label='Like'], \
	button[aria-label='Duplicate'], \
	button[aria-label='Open in Spaces'], \
	#space-info, #space-info-container, \
	div[id^='space-header'], \
	div.space-header, div.space-info, \
	footer"
	);

	bad.forEach(el => {
	try {
	el.style.display = "none";
	el.style.visibility = "hidden";
	el.style.opacity = "0";
	el.style.height = "0px";
	el.style.maxHeight = "0px";
	el.style.margin = "0px";
	el.style.padding = "0px";
	el.style.pointerEvents = "none";
	} catch(e) {}
	});
	}

	// Keep hiding header (Gradio re-renders DOM often)
	setInterval(hideHF, 400);
	setTimeout(hideHF, 50);
	</script>
	""")

	# Header HTML (keeps your look)
	gr.HTML("""
	<div class='header'>
	<div class='logo'>CT</div>
	<div>
	<div class='title-text'>Clinical Research Dictionary</div>
	<div style='font-size:14px; color:#444'>
	Search for any clinical research term or acronym —
	Answers sourced from official CDISC,SCDM,ICH,FDA documents/websites exactly as they appear in sources.
	</div>
	</div>
	</div>
	""")

	# Inject a datalist element populated server-side from glossary terms
	gr.HTML(f"""
	<!-- AUTOCOMPLETE DATALIST (built server-side from glossary.html) -->
	<datalist id="terms-list">
	{DATALIST_OPTIONS}
	</datalist>

	<script>
	// Attach datalist to Gradio textbox once the DOM is ready.
	// Gradio's textbox gets an input element we target by aria-label attribute.
	function attachDatalist() {{
	// find input by label text (safe fallback)
	const inputs = Array.from(document.querySelectorAll('input[type="text"], input:not([type])'));
	let target = null;
	for (const el of inputs) {{
	const label = el.getAttribute('aria-label') \|\| el.getAttribute('placeholder') \|\| "";
	if (label.toLowerCase().includes('term') \|\| label.toLowerCase().includes('question') \|\| label.toLowerCase().includes('your question')) {{
	target = el;
	break;
	}}
	}}
	// fallback: first text input
	if (!target && inputs.length) {{
	target = inputs[0];
	}}
	if (!target) return;

	target.setAttribute('list', 'terms-list');
	// small UX: show datalist on focus (works in most browsers)
	target.addEventListener('focus', (e) => {{
	// show suggestions by briefly blurring/focusing -- many browsers show automatically
	// nothing fancy here — modern browsers handle datalist filtering
	}});
	}}

	// try attaching repeatedly (Gradio may render after script)
	let tries = 0;
	const attachInterval = setInterval(() => {{
	attachDatalist();
	tries++;
	if (tries > 20) clearInterval(attachInterval);
	}}, 200);
	</script>
	""")

	# Search row (keeps your layout)
	with gr.Row():
	q = gr.Textbox(
	label="Term/Acronym",
	placeholder="e.g. What is an eCRF?",
	lines=1,
	scale=4,
	elem_id="queryBox"
	)
	submit = gr.Button("Submit", variant="primary", scale=1)

	out = gr.HTML()
	submit.click(stream_chat_generator, inputs=q, outputs=out)

	# Glossary Title + content
	#gr.Markdown("### <span style='color: #FF6600;'>Available Clinical Trial Terms & Acronyms (3000+)</span>")
	gr.HTML("<h3 style='color: orange;'>Available Clinical Trial Terms & Acronyms (3000+)</h3>")

	gr.HTML(f"""
	<div class="glossary-box">
	{GLOSSARY_HTML}
	</div>
	""")
	# ============================================================
	# Mobile API Proxy (FastAPI backend)
	# ============================================================
	from fastapi import FastAPI
	from fastapi.responses import JSONResponse
	import gradio as gr

	# Create the proxy FastAPI app
	proxy_api = FastAPI()

	@proxy_api.post("/chat")
	async def mobile_chat(request: dict):
	question = request.get("question", "")
	result = summarize_combined_wrapper(question)

	return JSONResponse(content={
	"answer": result.get("answer", ""),
	"citations": result.get("citations", []),
	"disclaimer": DISCLAIMER_TEXT,
	"status": "success"
	})


	# ============================================================
	# CREATE ROOT APP (FastAPI) AND MOUNT BOTH UI + API
	# ============================================================
	root_app = FastAPI()
	from datetime import datetime

	@root_app.get("/health")
	def health_check():
	return {
	"status": "ok",
	"service": "ct-chat",
	"timestamp": datetime.utcnow().isoformat()
	}
	# Mount /api → FastAPI backend
	root_app.mount("/api", proxy_api)

	# Mount / → Gradio UI
	root_app = gr.mount_gradio_app(root_app, demo, path="/")

	# ============================================================
	# Launch the root_app, not demo
	# ============================================================
	import uvicorn

	uvicorn.run(root_app, host="0.0.0.0", port=7860)