Spaces:

Kennethdot
/

ayaresa_AI

Runtime error

App Files Files Community

Kennethdotse commited on Oct 20

Commit

652d9c6

1 Parent(s): 398f908

update

Browse files

Files changed (2) hide show

app.py +215 -66
requirements.txt +14 -0

app.py CHANGED Viewed

@@ -1,70 +1,219 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+import os
+import sys
 import gradio as gr
+import torch
+from transformers import pipeline, BitsAndBytesConfig
+from datasets import load_dataset
+import pandas as pd
+from PIL import Image
+from typing import Optional
+from pathlib import Path
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain.document_loaders import DataFrameLoader, PyPDFLoader, CSVLoader
+from langchain.text_splitter import CharacterTextSplitter
+from huggingface_hub import HfApi
+# ---------- Configuration ----------
+MODEL_VARIANT = os.environ.get("MODEL_VARIANT", "4b-it")
+MODEL_ID = f"google/medgemma-{MODEL_VARIANT}"
+USE_QUANTIZATION = True
+LOCAL_DOCS_PATH = Path("./medical/hb_db")
+CHROMA_PERSIST_DIR = "./chroma_db"
+_pipe = None
+_rag_vectorstore = None
+_embeddings = None
+HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
+if not HF_TOKEN:
+    print("Error: no Hugging Face token found. Set HF_TOKEN or HUGGINGFACEHUB_API_TOKEN as an environment variable or Space secret.")
+    sys.exit(1)
+else:
+    try:
+        HfApi().whoami(token=HF_TOKEN)
+        print("Hugging Face token OK")
+    except Exception as e:
+        print("Invalid Hugging Face token:", e)
+        sys.exit(1)
+# ---------- Lazy initialization helpers ----------
+def _init_pipeline():
+    global _pipe
+    if _pipe is not None:
+        return _pipe
+    # Model kwargs
+    model_kwargs = dict(
+        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto",
+    )
+    if USE_QUANTIZATION:
+        try:
+            model_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True)
+        except Exception:
+            # bitsandbytes may not be available on CPU-only setups; ignore and fall back
+            pass
+    # Choose pipeline task type depending on variant
+    task = "image-text-to-text" if "image" in MODEL_VARIANT or "it" in MODEL_VARIANT else "text-generation"
+    print(f"Initializing pipeline: {MODEL_ID} task={task}")
+    _pipe = pipeline(
+        task,
+        model=MODEL_ID,
+        device_map=model_kwargs.get("device_map"),
+        torch_dtype=model_kwargs.get("torch_dtype"),
+        use_auth_token=HF_TOKEN,
+       **({} if "quantization_config" not in model_kwargs else {"quantization_config": model_kwargs["quantization_config"]}),
+   )
+    try:
+        _pipe.model.generation_config.do_sample = False
+    except Exception:
+        pass
+    return _pipe
+def _init_rag():
+    """Builds or loads a Chroma vectorstore from local files. This runs lazily on first request."""
+    global _rag_vectorstore, _embeddings
+    if _rag_vectorstore is not None:
+        return _rag_vectorstore
+    docs = []
+    # 1) Load a Hugging Face dataset (if available) — convert to a DataFrame
+    try:
+        ds = load_dataset("knowrohit07/know_medical_dialogue_v2")
+        df = pd.DataFrame(ds["train"])
+        if "instruction" in df.columns and "output" in df.columns:
+            df["full_dialogue"] = df["instruction"].astype(str) + " \n\n" + df["output"].astype(str)
+            loader = DataFrameLoader(df, page_content_column="full_dialogue")
+            docs += loader.load()
+    except Exception as e:
+        print("Warning: could not load HF dataset:", e)
+    # 2) Load local CSV if present
+    csv_path = LOCAL_DOCS_PATH / "Final_Dataset.csv"
+    if csv_path.exists():
+        try:
+            csv_loader = CSVLoader(str(csv_path))
+            docs += csv_loader.load()
+        except Exception as e:
+            print("Warning loading CSV:", e)
+    # 3) Load PDFs found in the directory
+    if LOCAL_DOCS_PATH.exists() and LOCAL_DOCS_PATH.is_dir():
+        for pdf_file in LOCAL_DOCS_PATH.glob("*.pdf"):
+            try:
+                pdf_loader = PyPDFLoader(str(pdf_file))
+                docs += pdf_loader.load()
+            except Exception as e:
+                print(f"Warning loading PDF {pdf_file}: {e}")
+    # 4) If still no docs, create a placeholder document
+    if len(docs) == 0:
+        from langchain.schema import Document
+        docs = [Document(page_content="No local documents found. Upload PDFs/CSV into ./medical/hb_db or commit them to the Space repo.")]
+    # 5) Split into chunks
+    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    chunks = splitter.split_documents(docs)
+    # 6) Embeddings and Chroma vectorstore
+    try:
+        _embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        _rag_vectorstore = Chroma.from_documents(chunks, _embeddings, persist_directory=CHROMA_PERSIST_DIR)
+        try:
+            _rag_vectorstore.persist()
+        except Exception:
+            pass
+    except Exception as e:
+        print("Error initializing vectorstore:", e)
+        _rag_vectorstore = None
+    return _rag_vectorstore
+# ---------- Main RAG + generation function ----------
+def generate_medgemma_rag_response(query: str, image: Optional[Image.Image] = None) -> str:
+    """Generate an answer using RAG + MedGemma model. This function will lazily initialize heavy resources."""
+    # Ensure rag is initialized
+    vs = _init_rag()
+    # Retrieve relevant docs if vectorstore exists
+    context = ""
+    if vs is not None:
+        try:
+            retrieved = vs.similarity_search(query, k=4)
+            context = "\n\n".join([d.page_content for d in retrieved])
+        except Exception as e:
+            print("Warning during similarity search:", e)
+    # Construct prompt
+    rag_prompt = f"You are a respectful, medical AI assistant. Use the provided context and your knowledge to answer and be clear when uncertain.\n\nContext:\n{context}\n\nUser Question: {query}\n\nAnswer:\n"
+    # Initialize pipeline lazily
+    pipe = _init_pipeline()
+    # Build input for the pipeline. The exact expected format can vary by pipeline task.
+    if image is not None:
+        # Provide an image + text prompt; pipeline expects inputs in a tuple/list depending on model
+        input_for_pipe = {"image": image, "text": rag_prompt}
+        try:
+            out = pipe(input_for_pipe, max_new_tokens=512)
+        except Exception:
+            # fallback to plain text prompt if image pipeline fails
+            out = pipe(rag_prompt, max_new_tokens=512)
+    else:
+        out = pipe(rag_prompt, max_new_tokens=512)
+    # Normalize output — many pipelines return a list of dicts
+    try:
+        if isinstance(out, list) and len(out) > 0:
+            # Prefer a sensible key if present
+            if isinstance(out[0], dict):
+                text = out[0].get("generated_text") or out[0].get("text") or str(out[0])
+            else:
+                text = str(out[0])
+        else:
+            text = str(out)
+    except Exception:
+        text = str(out)
+    return text
+# ...existing code...
+with gr.Blocks() as iface:
+    chatbot = gr.Chatbot(label="Ayaresa chat")
+    with gr.Row():
+        with gr.Column(scale=3):
+            txt = gr.Textbox(label="Enter a prompt", placeholder="Type your question here...", lines=2)
+        with gr.Column(scale=1):
+            img = gr.Image(type="pil", label="Image (optional)")
+    with gr.Row():
+        send = gr.Button("Send")
+        clear = gr.Button("Clear")
+    # keep conversation state explicitly
+    state = gr.State([])
+    def submit_fn(message, image, history):
+        history = history or []
+        if (not message or message.strip() == "") and image is None:
+            return history, "", history
+        resp = generate_medgemma_rag_response(message or "", image)
+        history.append((message or "", resp))
+        return history, "", history
+    send.click(submit_fn, inputs=[txt, img, state], outputs=[chatbot, txt, state])
+    txt.submit(submit_fn, inputs=[txt, img, state], outputs=[chatbot, txt, state])
+    clear.click(lambda: ([], "", []), inputs=None, outputs=[chatbot, txt, state])
 if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+gradio==4.10.0
+torch
+transformers
+datasets
+pandas
+Pillow
+langchain
+langchain-community
+chromadb
+sentence-transformers
+pypdf
+bitsandbytes
+accelerate
+huggingface-hub