Spaces:

wshuai190
/

AutoBool-Demo

Running on Zero

Shuai Wang

Fix ZeroGPU model caching, add logging, default to Conceptual model

4c2a357 about 2 months ago

23.8 kB

	"""
	AutoBool Demo — Hugging Face Spaces
	Loads models via transformers on ZeroGPU with custom reasoning templates.
	"""

	import html
	import json
	import os
	import urllib.parse
	from datetime import datetime, timezone
	from pathlib import Path
	from threading import Thread

	import gradio as gr
	import torch
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from huggingface_hub import CommitScheduler

	# ─────────────────────────── Model Registry ───────────────────────────

	MODELS = {
	"Autobool-Qwen4b-Reasoning": {
	"model_id": "ielabgroup/Autobool-Qwen4b-Reasoning",
	"has_thinking": True,
	"description": "Chain-of-thought reasoning before generating the query",
	},
	"Autobool-Qwen4b-Reasoning-Conceptual": {
	"model_id": "ielabgroup/Autobool-Qwen4b-Reasoning-conceptual",
	"has_thinking": True,
	"description": "Conceptual-level reasoning (maps topic to broad concepts first)",
	},
	"Autobool-Qwen4b-Reasoning-Objective": {
	"model_id": "ielabgroup/Autobool-Qwen4b-Reasoning-objective",
	"has_thinking": True,
	"description": "Objective-focused reasoning (focuses on measurable outcomes)",
	},
	"Autobool-Qwen4b-No-Reasoning": {
	"model_id": "ielabgroup/Autobool-Qwen4b-No-reasoning",
	"has_thinking": False,
	"description": "Direct query generation — no explicit reasoning",
	},
	}

	DEFAULT_MODEL = "Autobool-Qwen4b-Reasoning-Conceptual"

	# ─────────────────────────── Logging ───────────────────────────
	# Set HF_TOKEN (write-access token) and LOG_DATASET_REPO in Space secrets.
	# The scheduler pushes a daily JSONL file to a private HF dataset every 5 min.

	LOG_DIR = Path("logs")
	LOG_DIR.mkdir(exist_ok=True)

	_log_scheduler = None
	_hf_token = os.environ.get("HF_TOKEN")
	_log_repo = os.environ.get("LOG_DATASET_REPO", "wshuai190/AutoBool-Demo-Logs")

	if _hf_token:
	try:
	_log_scheduler = CommitScheduler(
	repo_id=_log_repo,
	repo_type="dataset",
	folder_path=LOG_DIR,
	path_in_repo="logs",
	private=True,
	token=_hf_token,
	)
	print(f"Logging enabled → {_log_repo}")
	except Exception as _exc:
	print(f"Logging setup failed: {_exc}")
	else:
	print("HF_TOKEN not set — logging disabled.")


	def _log_interaction(topic: str, model_key: str, query: str) -> None:
	"""Append one JSONL record to today's log file; CommitScheduler syncs it."""
	if _log_scheduler is None:
	return
	entry = {
	"timestamp": datetime.now(timezone.utc).isoformat(),
	"topic": topic,
	"model": model_key,
	"query": query,
	}
	log_file = LOG_DIR / f"log_{datetime.now(timezone.utc).strftime('%Y%m%d')}.jsonl"
	with _log_scheduler.lock:
	with log_file.open("a") as f:
	f.write(json.dumps(entry) + "\n")


	# ─────────────────────────── Model Cache ───────────────────────────
	# Models are always kept on CPU in the cache.
	# Inside @spaces.GPU we move to CUDA for inference, then back to CPU when done.
	# This makes the cache valid across ZeroGPU calls (GPU is released between calls,
	# so storing tensors on GPU in the cache causes invalid-memory errors).

	_cache: dict = {"name": None, "model": None, "tokenizer": None}


	def load_model(model_key: str):
	"""Return (model, tokenizer) from cache or download and cache to CPU."""
	if _cache["name"] == model_key and _cache["model"] is not None:
	return _cache["model"], _cache["tokenizer"]

	if _cache["model"] is not None:
	_cache["model"].cpu()
	del _cache["model"]
	_cache["model"] = None
	torch.cuda.empty_cache()

	model_id = MODELS[model_key]["model_id"]
	tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True,
	# No device_map: always lands on CPU. GPU move is explicit in generate().
	)
	model.eval()
	tokenizer.padding_side = "left"
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	_cache.update({"name": model_key, "model": model, "tokenizer": tokenizer})
	return model, tokenizer


	# Pre-load the default model at Space startup (to CPU, no GPU needed).
	# This means the first user only waits for the fast CPU→GPU transfer (~1s),
	# not a full multi-GB download.
	print(f"Pre-loading {DEFAULT_MODEL} to CPU…")
	try:
	load_model(DEFAULT_MODEL)
	print("Pre-load complete.")
	except Exception as _e:
	print(f"Pre-load failed (will download on first request): {_e}")


	# ─────────────────────────── Prompts ───────────────────────────

	def get_no_reason_template(topic: str) -> list:
	return [{
	"role": "system",
	"content": "You are an expert systematic review information specialist.\nYou are tasked to formulate a systematic review Boolean query in response to a research topic. The final Boolean query must be enclosed within <answer> </answer> tags. Do not include any explanation or reasoning.",
	}, {
	"role": "user",
	"content": f'You are given a systematic review research topic, with the topic title "{topic}".\n'
	"Your task is to formulate a highly effective Boolean query in MEDLINE format for PubMed.\n"
	"The query should balance high recall (capturing all relevant studies) with reasonable precision (avoiding irrelevant results):\n"
	"- Use both free-text terms and MeSH terms (e.g., chronic pain[tiab], Pain[mh]).\n"
	"- Do not wrap terms or phrases in double quotes, as this disables automatic term mapping (ATM).\n"
	"- Combine synonyms or related terms within a concept using OR.\n"
	"- Combine different concepts using AND.\n"
	"- Use wildcards () to capture word variants (e.g., vaccin → vaccine, vaccination):\n"
	" - Terms must have ≥4 characters before the * (e.g., colo*)\n"
	" - Wildcards work with field tags (e.g., breastfeed*[tiab]).\n"
	"- Field tags limit the search to specific fields and disable ATM.\n"
	"- Do not include date limits.\n"
	"- Tag term using term field (e.g., covid-19[ti] vaccine[ti] children[ti]) when needed.\n"
	"Only use the following allowed field tags:\n"
	"Title: [ti], Abstract: [ab], Title/Abstract: [tiab]\n"
	"MeSH: [mh], Major MeSH: [majr], Supplementary Concept: [nm]\n"
	"Text Words: [tw], All Fields: [all]\n"
	"Publication Type: [pt], Language: [la]\n\n"
	"Output and only output the formulated Boolean query inside <answer></answer> tags. Do not include any explanation or content outside or inside the <answer> tags."
	}]

	def get_reasoning_template(topic: str) -> list:
	return [{
	"role": "system",
	"content": "You are an expert systematic review information specialist.\nYou are tasked to formulate a systematic review Boolean query in response to a research topic.\nYour reasoning process should be enclosed within <think></think>, and the final Boolean query must be enclosed within <answer></answer> tags. Do not include anything outside of these tags.",
	}, {
	"role": "user",
	"content": f'You are given a systematic review research topic, with the topic title "{topic}".\n'
	"Your task is to generate a highly effective Boolean query in MEDLINE format for PubMed.\n"
	"The query should balance high recall (capturing all relevant studies) with reasonable precision (avoiding irrelevant results):\n"
	"- Use both free-text terms and MeSH terms (e.g., chronic pain[tiab], Pain[mh]).\n"
	"- Do not wrap terms or phrases in double quotes, as this disables automatic term mapping (ATM).\n"
	"- Combine synonyms or related terms within a concept using OR.\n"
	"- Combine different concepts using AND.\n"
	"- Use wildcards () to capture word variants (e.g., vaccin → vaccine, vaccination):\n"
	" - Terms must have ≥4 characters before the * (e.g., colo*)\n"
	" - Wildcards work with field tags (e.g., breastfeed*[tiab]).\n"
	"- Field tags limit the search to specific fields and disable ATM.\n"
	"- Do not include date limits.\n"
	"- Tag terms using appropriate fields (e.g., covid-19[ti] vaccine[ti] children[ti]) when needed.\n"
	"Only use the following allowed field tags:\n"
	"Title: [ti], Abstract: [ab], Title/Abstract: [tiab]\n"
	"MeSH: [mh], Major MeSH: [majr], Supplementary Concept: [nm]\n"
	"Text Words: [tw], All Fields: [all]\n"
	"Publication Type: [pt], Language: [la]\n\n"
	"Output your full reasoning inside <think></think>.\n"
	"Output the final Boolean query inside <answer></answer>.\n"
	"Do not include any content outside these tags."
	}]

	def get_conceptual_template(topic: str) -> list:
	return [{
	"role": "system",
	"content": "You are an expert systematic review information specialist.\nFormulate a systematic review Boolean query using step-by-step reasoning inside <think> </think>, and output the final query inside <answer> </answer>.",
	}, {
	"role": "user",
	"content": f'You are given a systematic review topic titled: "{topic}".\n'
	"Construct a Boolean query using the conceptual method, based on domain logic and structured thinking.\n\n"
	"Step 1: Identify 2–3 key concepts from the topic (e.g., Population, Intervention, Outcome).\n\n"
	"Step 2: For each concept:\n"
	"- List related terms: synonyms, variants, relevant MeSH terms.\n"
	"- Prioritise specific, high-precision terms.\n\n"
	"Step 3: Create a Boolean block per concept:\n"
	"- Combine terms using OR\n"
	"- Use free-text terms and MeSH terms (e.g., chronic pain[tiab], Pain[mh])\n"
	"- Do not wrap terms or phrases in double quotes, as this disables automatic term mapping (ATM)\n"
	"- Tag terms individually when needed (e.g., covid-19[ti] vaccine[ti] children[ti])\n"
	"- Field tags limit search scope and disable ATM\n\n"
	"Step 4: Use wildcards () to capture word variants (e.g., vaccin → vaccine, vaccination):\n"
	" - Terms must have ≥4 characters before the * (e.g., colo*)\n"
	" - Wildcards work with field tags (e.g., breastfeed*[tiab]).\n\n"
	"Step 5: Combine all Boolean blocks using AND:\n"
	"((Concept1_term1[tiab] OR Concept1_term2[tiab] OR Concept1_termX[mh]) AND (Concept2_...))\n\n"
	"Only use the following allowed field tags:\n"
	"Title: [ti], Abstract: [ab], Title/Abstract: [tiab]\n"
	"MeSH: [mh], Major MeSH: [majr], Supplementary Concept: [nm]\n"
	"Text Words: [tw], All Fields: [all]\n"
	"Publication Type: [pt], Language: [la]\n\n"
	"Output your full reasoning inside <think>...</think>\n"
	"Output only the final Boolean query inside <answer>...</answer>\n"
	"Do not include any content outside these tags.\n"
	"Do not include date limits."
	}]

	def get_objective_template(topic: str) -> list:
	return [{
	"role": "system",
	"content": "You are an expert systematic review information specialist.\nYou are tasked to formulate a systematic review Boolean query step by step as a reasoning process within <think> </think>, and provide the Boolean query formulated <answer> </answer>.",
	}, {
	"role": "user",
	"content": f'You are given a systematic review research topic, with the topic title "{topic}".\n'
	"You need to simulate a Boolean query construction process using the objective method, which is grounded in domain expertise and structured logic.\n\n"
	"Step 1: Simulate a concise title and abstract (2–3 sentences) of a relevant and focused article clearly aligned with the topic. This is a hypothetical but plausible example.\n\n"
	"Step 2: Based on the simulated text, identify key informative terms or phrases that best represent the article's core concepts. Prioritise specificity and informativeness. Avoid overly broad or ambiguous terms.\n\n"
	"Step 3: Categorise each term into one of the following:\n"
	"- (A) Health conditions or populations (e.g., diabetes, adolescents)\n"
	"- (B) Treatments, interventions, or exposures (e.g., insulin therapy, air pollution)\n"
	"- (C) Study designs or methodologies (e.g., randomized controlled trial, cohort study)\n"
	"- (N/A) Not applicable to any of the above categories\n\n"
	"Step 4: Using the categorised terms, build a Boolean query in MEDLINE format for PubMed:\n"
	"- Combine synonyms or related terms within each category using OR\n"
	"- Use both free-text terms and MeSH terms (e.g., chronic pain[tiab], Pain[mh])\n"
	"- Do not wrap terms or phrases in double quotes, as this disables automatic term mapping (ATM)\n"
	"- Tag each term individually when needed (e.g., covid-19[ti] vaccine[ti] children[ti])\n"
	"- Field tags limit the search to specific fields and disable ATM\n\n"
	"Step 5: Use wildcards () to capture word variants (e.g., vaccin → vaccine, vaccination):\n"
	" - Terms must have ≥4 characters before the * (e.g., colo*)\n"
	" - Wildcards work with field tags (e.g., breastfeed*[tiab]).\n\n"
	"Step 6: Combine all category blocks using AND:\n"
	"((itemA1[tiab] OR itemA2[tiab] OR itemA3[mh]) AND (itemB1[tiab] OR ...) AND (itemC1[tiab] OR ...))\n\n"
	"Only use the following allowed field tags:\n"
	"Title: [ti], Abstract: [ab], Title/Abstract: [tiab]\n"
	"MeSH: [mh], Major MeSH: [majr], Supplementary Concept: [nm]\n"
	"Text Words: [tw], All Fields: [all]\n"
	"Publication Type: [pt], Language: [la]\n\n"
	"Place your full reasoning (including simulated abstract, term list, classification, and query construction) inside <think></think>.\n"
	"Output the final Boolean query inside <answer></answer>.\n"
	"Do not include anything outside the <think> and <answer> tags.\n"
	"Do not include date restrictions."
	}]

	def build_messages(topic: str, model_key: str) -> list:
	topic_clean = topic.strip()
	if model_key == "Autobool-Qwen4b-No-Reasoning":
	return get_no_reason_template(topic_clean)
	elif model_key == "Autobool-Qwen4b-Reasoning-Conceptual":
	return get_conceptual_template(topic_clean)
	elif model_key == "Autobool-Qwen4b-Reasoning-Objective":
	return get_objective_template(topic_clean)
	else:
	return get_reasoning_template(topic_clean)

	# ─────────────────────────── Parsing ───────────────────────────

	def _between(text, open_tag, close_tag):
	if open_tag not in text:
	return ""
	after = text.split(open_tag, 1)[1]
	if close_tag in after:
	return after.split(close_tag, 1)[0].strip()
	return after.strip()

	def parse(text):
	thinking = _between(text, "<think>", "</think>")
	answer = _between(text, "<answer>", "</answer>")
	if not answer and "<think>" not in text and "<answer>" not in text:
	answer = text.strip()
	return thinking, answer

	# ─────────────────────────── PubMed Link ───────────────────────────

	def pubmed_html(query: str) -> str:
	if not query.strip():
	return ""
	url = f"https://pubmed.ncbi.nlm.nih.gov/?term={urllib.parse.quote(query)}"
	safe_query = html.escape(query)
	return f"""
	<div style="padding:16px;background:#f0f7ff;border-radius:10px;border:1px solid #b3d4f7;margin-top:4px;">
	<p style="margin:0 0 6px 0;font-weight:600;color:#1a3a5c;">🔗 PubMed Search Link</p>
	<code style="display:block;background:#fff;padding:8px;border-radius:4px;font-size:12px;
	margin-bottom:10px;word-break:break-all;border:1px solid #dde8f7;
	white-space:pre-wrap;">{safe_query}</code>
	<a href="{url}" target="_blank" rel="noopener noreferrer"
	style="padding:8px 18px;background:#0066cc;color:#fff;border-radius:5px;
	text-decoration:none;font-size:14px;font-weight:500;">Search PubMed →</a>
	</div>"""

	# ─────────────────────────── Generation ───────────────────────────

	@spaces.GPU
	def generate(topic: str, model_key: str):
	if not topic.strip():
	yield "Please enter a research topic.", "", "", ""
	return

	# Load (or retrieve from CPU cache)
	in_cache = _cache["name"] == model_key and _cache["model"] is not None
	status = "⏳ Moving model to GPU…" if in_cache else "⏳ Downloading model (~8 GB, first run)…"
	yield status, "", "", ""
	model, tokenizer = load_model(model_key)

	# Move to GPU for this ZeroGPU call
	if next(model.parameters()).device.type != "cuda":
	model.cuda()

	messages = build_messages(topic, model_key)
	try:
	prompt = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True,
	enable_thinking=MODELS[model_key]["has_thinking"],
	)
	except TypeError:
	prompt = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True,
	)

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	thread = Thread(target=model.generate, kwargs=dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=4096,
	do_sample=True,
	temperature=0.6,
	top_p=0.95,
	pad_token_id=tokenizer.eos_token_id,
	), daemon=True)
	thread.start()

	yield "🔄 Generating on ZeroGPU…", "", "", ""
	full_text = ""
	for token in streamer:
	full_text += token
	thinking, answer = parse(full_text)
	yield "🔄 Generating on ZeroGPU…", thinking, answer, ""

	thread.join()
	thinking, answer = parse(full_text)

	# Return model to CPU so the cache is valid for the next ZeroGPU call.
	# (ZeroGPU releases the GPU after this function returns; tensors left on GPU
	# would become invalid, breaking the cache on the next request.)
	model.cpu()
	torch.cuda.empty_cache()

	_log_interaction(topic, model_key, answer)
	yield "✅ Done!", thinking, answer, pubmed_html(answer)

	# ─────────────────────────── UI ───────────────────────────

	HEADER = """
	# 🔬 AutoBool — Boolean Query Generator for Systematic Reviews
	AutoBool uses reinforcement-learning-trained LLMs to generate high-quality PubMed Boolean queries for systematic reviews.

	📄 [AutoBool (EACL 2026)](https://arxiv.org/abs/2602.00005)  \|  🤗 [ielabgroup](https://huggingface.co/ielabgroup)  \|  💻 [GitHub](https://github.com/ielab/AutoBool)
	"""

	MODEL_INFO = {
	"Autobool-Qwen4b-Reasoning": "Step-by-step reasoning inside `<think>` tags before producing the query.",
	"Autobool-Qwen4b-Reasoning-Conceptual": "Maps topic to high-level concepts first, then builds the query.",
	"Autobool-Qwen4b-Reasoning-Objective": "Focuses reasoning on measurable objectives and study designs.",
	"Autobool-Qwen4b-No-Reasoning": "Direct generation — no thinking tokens, fastest output.",
	}

	with gr.Blocks(title="AutoBool Demo") as demo:
	gr.Markdown(HEADER)

	with gr.Row():
	with gr.Column(scale=2):
	topic_input = gr.Textbox(
	label="Systematic Review Topic",
	value="Diagnostic accuracy of endoscopic ultrasonography for preoperative staging of gastric cancer",
	placeholder="Enter your systematic review topic here… e.g. Diagnostic accuracy of endoscopic ultrasonography for preoperative staging of gastric cancer; hit Generate to use the default topic",
	lines=3,
	)
	with gr.Column(scale=1):
	model_dropdown = gr.Dropdown(
	choices=list(MODELS.keys()),
	value=DEFAULT_MODEL,
	label="Model",
	)
	model_info_md = gr.Markdown(value=MODEL_INFO[DEFAULT_MODEL])

	generate_btn = gr.Button("⚡ Generate Boolean Query", variant="primary", size="lg")
	status_box = gr.Textbox(interactive=False, max_lines=1, show_label=False)

	with gr.Row():
	thinking_box = gr.Textbox(
	label="🧠 Model Reasoning",
	lines=14, interactive=False,
	placeholder="Reasoning will appear here for thinking models...",
	)
	query_box = gr.Textbox(
	label="🔍 Generated Boolean Query",
	lines=8, interactive=False,
	placeholder="The Boolean query will appear here...",
	)

	pubmed_out = gr.HTML()

	gr.Examples(
	examples=[
	["Diagnostic accuracy of endoscopic ultrasonography for preoperative staging of gastric cancer", "Autobool-Qwen4b-Reasoning"],
	["Cognitive behavioural therapy versus pharmacotherapy for major depressive disorder in adults", "Autobool-Qwen4b-Reasoning-Conceptual"],
	["Immune checkpoint inhibitors with chemotherapy for advanced non-small cell lung cancer", "Autobool-Qwen4b-Reasoning-Objective"],
	["Machine learning for early prediction of sepsis in ICU patients", "Autobool-Qwen4b-No-Reasoning"],
	],
	inputs=[topic_input, model_dropdown],
	label="Example Topics",
	)

	model_dropdown.change(fn=lambda k: MODEL_INFO.get(k, ""), inputs=model_dropdown, outputs=model_info_md)

	generate_btn.click(
	fn=generate,
	inputs=[topic_input, model_dropdown],
	outputs=[status_box, thinking_box, query_box, pubmed_out],
	)

	if __name__ == "__main__":
	demo.launch()