Spaces:

kirubel1738
/

biogpt-pubmedqa-chatbot

Sleeping

App Files Files Community

kirubel1738 commited on Sep 23, 2025

Commit

ac9c332

verified ·

1 Parent(s): c52545d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +150 -34

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,156 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+# streamlit_app.py
+import os
+import json
+import time
+# -----------------------------
+# IMPORTANT: set cache dirs BEFORE importing transformers/huggingface_hub
+# -----------------------------
+os.environ.setdefault("HF_HOME", os.environ.get("HF_HOME", "/tmp/huggingface"))
+os.environ.setdefault("TRANSFORMERS_CACHE", os.environ.get("TRANSFORMERS_CACHE", "/tmp/huggingface/transformers"))
+os.environ.setdefault("HF_DATASETS_CACHE", os.environ.get("HF_DATASETS_CACHE", "/tmp/huggingface/datasets"))
+os.environ.setdefault("HUGGINGFACE_HUB_CACHE", os.environ.get("HUGGINGFACE_HUB_CACHE", "/tmp/huggingface/hub"))
+os.environ.setdefault("XDG_CACHE_HOME", os.environ.get("XDG_CACHE_HOME", "/tmp/huggingface"))
+os.environ.setdefault("HOME", os.environ.get("HOME", "/tmp"))
+# create cache dirs (best-effort)
+for d in [os.environ["HF_HOME"], os.environ["TRANSFORMERS_CACHE"], os.environ["HF_DATASETS_CACHE"], os.environ["HUGGINGFACE_HUB_CACHE"]]:
+    try:
+        os.makedirs(d, exist_ok=True)
+        os.chmod(d, 0o777)
+    except Exception:
+        pass
 import streamlit as st
+import requests
+# Optional heavy imports will be inside local-model branch
+LOCAL_MODE = os.environ.get("USE_LOCAL_MODEL", "0") == "1"
+# default model id the user provided; keep as-is
+DEFAULT_MODEL_ID = "kirubel1738/biogpt-pubmedqa-finetuned"
+st.set_page_config(page_title="BioGPT (PubMedQA) demo", layout="centered")
+st.title("BioGPT — PubMedQA demo")
+st.caption("Defaults to the Hugging Face Inference API (recommended for Spaces / CPU).")
+st.markdown(
+    """
+**How it works**
+- By default the app will call Hugging Face's Inference API for the model you specify (fast and avoids memory issues).
+- If you set `USE_LOCAL_MODEL=1` in your environment, the app will attempt to load the model locally using `transformers` (only for GPUs/large memory machines).
 """
+)
+col1, col2 = st.columns([3,1])
+with col1:
+    model_id = st.text_input("Model repo id", value=DEFAULT_MODEL_ID, help="Hugging Face repo id (e.g. username/modelname).")
+    prompt = st.text_area("Question / prompt", height=180, placeholder="Enter a PubMed-style question or prompt...")
+with col2:
+    max_new_tokens = st.slider("Max new tokens", 16, 1024, 128)
+    temperature = st.slider("Temperature", 0.0, 1.5, 0.0, step=0.05)
+    method = st.radio("Run method", ("Inference API (recommended)", "Local model (heavy)"), index=0)
+# override radio if user set USE_LOCAL_MODEL env var
+if LOCAL_MODE:
+    method = "Local model (heavy)"
+hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN") or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_TOKEN")
+def call_inference_api(model_id: str, prompt: str, max_new_tokens: int, temperature: float):
+    """
+    Simple POST to Hugging Face Inference API.
+    If you want to use the InferenceClient from huggingface_hub you can swap this.
+    """
+    api_url = f"https://api-inference.huggingface.co/models/{model_id}"
+    headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
+    payload = {
+        "inputs": prompt,
+        "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature},
+        "options": {"wait_for_model": True}
+    }
+    try:
+        r = requests.post(api_url, headers=headers, json=payload, timeout=120)
+    except Exception as e:
+        return False, f"Request failed: {e}"
+    if r.status_code != 200:
+        try:
+            error = r.json()
+        except Exception:
+            error = r.text
+        return False, f"API error ({r.status_code}): {error}"
+    try:
+        resp = r.json()
+        # handle several possible response schemas
+        if isinstance(resp, dict) and "error" in resp:
+            return False, resp["error"]
+        # often it's a list of dicts with 'generated_text'
+        if isinstance(resp, list):
+            out_texts = []
+            for item in resp:
+                if isinstance(item, dict):
+                    # common key: 'generated_text'
+                    for k in ("generated_text", "text", "content"):
+                        if k in item:
+                            out_texts.append(item[k])
+                            break
+                    else:
+                        out_texts.append(json.dumps(item))
+                else:
+                    out_texts.append(str(item))
+            return True, "\n\n".join(out_texts)
+        # fallback
+        return True, str(resp)
+    except Exception as e:
+        return False, f"Could not parse response: {e}"
+# Local model loader (only if method chosen)
+generator = None
+if method.startswith("Local"):
+    st.warning("Local model mode selected — this requires transformers + torch and lots of RAM/GPU. Only use if you know the model fits your hardware.")
+    try:
+        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+        import torch
+        device = 0 if torch.cuda.is_available() else -1
+        st.info(f"torch.cuda.is_available={torch.cuda.is_available()} -- device set to {device}")
+        with st.spinner("Loading tokenizer & model (this can take a while)..."):
+            tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=os.environ.get("TRANSFORMERS_CACHE"))
+            model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=os.environ.get("TRANSFORMERS_CACHE"), low_cpu_mem_usage=True)
+            generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
+    except Exception as e:
+        st.error(f"Local model load failed: {e}")
+        st.stop()
+if st.button("Generate"):
+    if not prompt or prompt.strip() == "":
+        st.error("Please enter a prompt.")
+        st.stop()
+    if method.startswith("Inference"):
+        if ("kirubel1738/biogpt-pubmedqa-finetuned" in model_id) and not hf_token:
+            st.info("If the model is private or rate-limited, set HUGGINGFACE_HUB_TOKEN as a secret in Spaces or as an env var locally.")
+        with st.spinner("Querying Hugging Face Inference API..."):
+            ok, out = call_inference_api(model_id, prompt, max_new_tokens, float(temperature))
+        if not ok:
+            st.error(out)
+        else:
+            st.success("Done")
+            st.text_area("Model output", value=out, height=320)
+    else:
+        # local model generation
+        try:
+            with st.spinner("Running local generation..."):
+                results = generator(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature)
+                if isinstance(results, list) and len(results) > 0 and "generated_text" in results[0]:
+                    out = results[0]["generated_text"]
+                else:
+                    out = str(results)
+            st.success("Done")
+            st.text_area("Model output", value=out, height=320)
+        except Exception as e:
+            st.error(f"Local generation failed: {e}")
+st.markdown("---")
+st.caption("If you run into permissions errors in Spaces, ensure the HF cache env vars above point to a writable directory (we already set them to /tmp/huggingface in this container).")