Spaces:

kirubel1738
/

biogpt-pubmedqa-chatbot

Running

App Files Files Community

kirubel1738 commited on Sep 23

Commit

d17b90e

verified ·

1 Parent(s): c9d2fa0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +44 -145

src/streamlit_app.py CHANGED Viewed

@@ -1,157 +1,56 @@
 # streamlit_app.py
 import os
-import json
-import time
 # -----------------------------
-# IMPORTANT: set cache dirs BEFORE importing transformers/huggingface_hub
 # -----------------------------
-os.environ.setdefault("HF_HOME", os.environ.get("HF_HOME", "/tmp/huggingface"))
-os.environ.setdefault("TRANSFORMERS_CACHE", os.environ.get("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers"))
-os.environ.setdefault("HF_DATASETS_CACHE", os.environ.get("HF_DATASETS_CACHE", "/tmp/huggingface/datasets"))
-os.environ.setdefault("HUGGINGFACE_HUB_CACHE", os.environ.get("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub"))
-os.environ.setdefault("XDG_CACHE_HOME", os.environ.get("XDG_CACHE_HOME", "/tmp/huggingface"))
-os.environ.setdefault("HOME", os.environ.get("HOME", "/tmp"))
-# create cache dirs (best-effort)
-for d in [os.environ["HF_HOME"], os.environ["TRANSFORMERS_CACHE"], os.environ["HF_DATASETS_CACHE"], os.environ["HUGGINGFACE_HUB_CACHE"]]:
-    try:
-        os.makedirs(d, exist_ok=True)
-        os.chmod(d, 0o777)
-    except Exception:
-        pass
-import streamlit as st
-import requests
-# Optional heavy imports will be inside local-model branch
-LOCAL_MODE = os.environ.get("USE_LOCAL_MODEL", "0") == "1"
-# default model id the user provided; keep as-is
-DEFAULT_MODEL_ID = "kirubel1738/biogpt-pubmedqa-finetuned"
-st.set_page_config(page_title="BioGPT (PubMedQA) demo", layout="centered")
-st.title("BioGPT — PubMedQA demo")
-st.caption("Defaults to the Hugging Face Inference API (recommended for Spaces / CPU).")
-st.markdown(
-    """
-**How it works**
-- By default the app will call Hugging Face's Inference API for the model you specify (fast and avoids memory issues).
-- If you set `USE_LOCAL_MODEL=1` in your environment, the app will attempt to load the model locally using `transformers` (only for GPUs/large memory machines).
-"""
-)
-col1, col2 = st.columns([3,1])
-with col1:
-    model_id = st.text_input("Model repo id", value=DEFAULT_MODEL_ID, help="Hugging Face repo id (e.g. username/modelname).")
-    prompt = st.text_area("Question / prompt", height=180, placeholder="Enter a PubMed-style question or prompt...")
-with col2:
-    max_new_tokens = st.slider("Max new tokens", 16, 1024, 128)
-    temperature = st.slider("Temperature", 0.0, 1.5, 0.0, step=0.05)
-    method = st.radio("Run method", ("Inference API (recommended)", "Local model (heavy)"), index=0)
-# override radio if user set USE_LOCAL_MODEL env var
-if LOCAL_MODE:
-    method = "Local model (heavy)"
-hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN") or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
-def call_inference_api(model_id: str, prompt: str, max_new_tokens: int, temperature: float):
-    """
-    Simple POST to Hugging Face Inference API.
-    If you want to use the InferenceClient from huggingface_hub you can swap this.
-    """
-    api_url = f"https://api-inference.huggingface.co/models/{model_id}"
-    headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
-    payload = {
-        "inputs": prompt,
-        "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature},
-        "options": {"wait_for_model": True}
-    }
-    try:
-        r = requests.post(api_url, headers=headers, json=payload, timeout=120)
-    except Exception as e:
-        return False, f"Request failed: {e}"
-    if r.status_code != 200:
-        try:
-            error = r.json()
-        except Exception:
-            error = r.text
-        return False, f"API error ({r.status_code}): {error}"
-    try:
-        resp = r.json()
-        # handle several possible response schemas
-        if isinstance(resp, dict) and "error" in resp:
-            return False, resp["error"]
-        # often it's a list of dicts with 'generated_text'
-        if isinstance(resp, list):
-            out_texts = []
-            for item in resp:
-                if isinstance(item, dict):
-                    # common key: 'generated_text'
-                    for k in ("generated_text", "text", "content"):
-                        if k in item:
-                            out_texts.append(item[k])
-                            break
-                    else:
-                        out_texts.append(json.dumps(item))
-                else:
-                    out_texts.append(str(item))
-            return True, "\n\n".join(out_texts)
-        # fallback
-        return True, str(resp)
-    except Exception as e:
-        return False, f"Could not parse response: {e}"
-# Local model loader (only if method chosen)
-generator = None
-if method.startswith("Local"):
-    st.warning("Local model mode selected — this requires transformers + torch and lots of RAM/GPU. Only use if you know the model fits your hardware.")
-    try:
-        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-        import torch
-        device = 0 if torch.cuda.is_available() else -1
-        st.info(f"torch.cuda.is_available={torch.cuda.is_available()} -- device set to {device}")
-        with st.spinner("Loading tokenizer & model (this can take a while)..."):
-            tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=os.environ.get("TRANSFORMERS_CACHE"))
-            model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=os.environ.get("TRANSFORMERS_CACHE"), low_cpu_mem_usage=True)
-            generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
-    except Exception as e:
-        st.error(f"Local model load failed: {e}")
-        st.stop()
-if st.button("Generate"):
-    if not prompt or prompt.strip() == "":
-        st.error("Please enter a prompt.")
-        st.stop()
-    if method.startswith("Inference"):
-        if ("kirubel1738/biogpt-pubmedqa-finetuned" in model_id) and not hf_token:
-            st.info("If the model is private or rate-limited, set HUGGINGFACE_HUB_TOKEN as a secret in Spaces or as an env var locally.")
-        with st.spinner("Querying Hugging Face Inference API..."):
-            ok, out = call_inference_api(model_id, prompt, max_new_tokens, float(temperature))
-        if not ok:
-            st.error(out)
-        else:
-            st.success("Done")
-            st.text_area("Model output", value=out, height=320)
     else:
-        # local model generation
-        try:
-            with st.spinner("Running local generation..."):
-                results = generator(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature)
-                if isinstance(results, list) and len(results) > 0 and "generated_text" in results[0]:
-                    out = results[0]["generated_text"]
-                else:
-                    out = str(results)
-            st.success("Done")
-            st.text_area("Model output", value=out, height=320)
-        except Exception as e:
-            st.error(f"Local generation failed: {e}")
 st.markdown("---")
-st.caption("If you run into permissions errors in Spaces, ensure the HF cache env vars above point to a writable directory (we already set them to /tmp/huggingface in this container).")

 # streamlit_app.py
 import os
+import streamlit as st
+from transformers import pipeline
 # -----------------------------
+# Ensure cache dirs are writable in Spaces
 # -----------------------------
+os.environ.setdefault("HF_HOME", "/tmp/huggingface")
+os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers")
+os.environ.setdefault("HF_DATASETS_CACHE", "/tmp/huggingface/datasets")
+os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub")
+os.environ.setdefault("XDG_CACHE_HOME", "/tmp/huggingface")
+# Hardcoded model repo
+MODEL_ID = "kirubel1738/biogpt-pubmedqa-finetuned"
+@st.cache_resource
+def load_model():
+    """Load BioGPT model (on CPU)."""
+    generator = pipeline("text-generation", model=MODEL_ID, device=-1)
+    return generator
+# Load once
+generator = load_model()
+# -----------------------------
+# Streamlit UI
+# -----------------------------
+st.set_page_config(page_title="BioGPT — PubMedQA demo", layout="centered")
+st.title("🧬 BioGPT — PubMedQA Demo")
+st.write("Ask a biomedical question and get an answer generated by BioGPT fine-tuned on PubMedQA.")
+user_input = st.text_area("Enter your biomedical question:", height=150)
+if st.button("Get Answer"):
+    if user_input.strip():
+        with st.spinner("Generating answer..."):
+            try:
+                result = generator(
+                    user_input,
+                    max_new_tokens=128,
+                    do_sample=True,
+                    temperature=0.7
+                )
+                output_text = result[0]["generated_text"]
+                st.success("Answer:")
+                st.write(output_text)
+            except Exception as e:
+                st.error(f"Generation failed: {e}")
     else:
+        st.warning("Please enter a question.")
 st.markdown("---")
+st.caption("Model: kirubel1738/biogpt-pubmedqa-finetuned | Runs on CPU")