Spaces:

duniele
/

Cardio-Oncology-RAG

Sleeping

App Files Files Community

duniele commited on Dec 5, 2025

Commit

48dda83

verified ·

1 Parent(s): 8de173f

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -54

app.py CHANGED Viewed

@@ -1,70 +1,123 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+import os
+import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from typing import Dict, Any, List
+# --- 1. SETUP & MODEL LOADING ---
+print("⏳ Loading Models...")
+# Initialize Embeddings (CPU is fine for this)
+embedding_function = HuggingFaceEmbeddings(
+    model_name="nomic-ai/nomic-embed-text-v1.5",
+    model_kwargs={"trust_remote_code": True, "device": "cpu"}
+)
+# Load Vector Database
+# NOTE: Ensure the 'chroma_db' folder is uploaded to the same directory as app.py
+if not os.path.exists("./chroma_db"):
+    raise ValueError("❌ Error: 'chroma_db' folder not found! Please upload your vector database.")
+vector_db = Chroma(persist_directory="./chroma_db", embedding_function=embedding_function)
+# Load LLM (TinyLlama)
+# We use device_map="auto" to use GPU if available in the Space, otherwise CPU
+model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+# Create HF Pipeline
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=256,
+    repetition_penalty=1.15,
+    temperature=0.1,
+    do_sample=True
+)
+llm = HuggingFacePipeline(pipeline=pipe)
+# --- 2. DEFINE MANUAL QA CHAIN ---
+class ManualQAChain:
+    def __init__(self, vector_store: Chroma, llm_pipeline: HuggingFacePipeline):
+        self.retriever = vector_store.as_retriever(search_kwargs={"k": 2})
+        self.llm = llm_pipeline
+    def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
+        query = inputs.get("query", "")
+        # 1. RETRIEVAL
+        docs = self.retriever.invoke(query)
+        context = "\n\n".join([d.page_content for d in docs])
+        # 2. PROMPT CREATION
+        max_context_length = 2000
+        prompt = f"""<|system|>
+You are a helpful and accurate medical assistant.
+Use ONLY the following context to answer the user's question.
+If the context does not contain the answer, say: "I cannot find the answer in the provided context."
+Context:
+{context[:max_context_length]}
+</s>
+<|user|>
+{query}
+</s>
+<|assistant|>
 """
+        # 3. GENERATION
+        response = self.llm.invoke(prompt)
+        # Handle Output format (some versions return list, some string)
+        text = response[0]['generated_text'] if isinstance(response, list) else str(response)
+        # Clean output
+        if "<|assistant|>" in text:
+            final_answer = text.split("<|assistant|>")[-1].strip()
+        else:
+            final_answer = text.strip()
+        return {"result": final_answer, "source_documents": docs}
+# Initialize Chain
+qa_chain = ManualQAChain(vector_db, llm)
+print("✅ RAG Pipeline is ready.")
+# --- 3. GRADIO UI FUNCTION ---
+def medical_rag_chat(message, history):
+    if not message:
+        return "Please ask a medical question."
+    try:
+        response = qa_chain.invoke({"query": message})
+        answer_text = response['result']
+        # Format Sources
+        sources_text = "\n\n---\n**Retrieved Context:**\n"
+        if response.get('source_documents'):
+            for i, doc in enumerate(response['source_documents']):
+                topic = doc.metadata.get('focus_area', 'Medical Protocol')
+                snippet = doc.page_content.replace('\n', ' ').strip()
+                sources_text += f"**{i+1}. [{topic}]** *\"{snippet[:500]}...\"*\n"
+        else:
+            sources_text += "(No context found.)"
+        return answer_text + sources_text
+    except Exception as e:
+        return f"⚠️ Error: {str(e)}"
+# --- 4. LAUNCH UI ---
+# Note: share=True is NOT needed in HF Spaces
+demo = gr.ChatInterface(
+    fn=medical_rag_chat,
+    title="Cardio-Oncology RAG Assistant",
+    description="TinyLlama-1.1B + MedQuAD RAG",
+    examples=["What is (are) BRCA2 hereditary breast and ovarian cancer syndrome ?", "Who is at risk for Heart Failure?"],
+    concurrency_limit=2
+)
 if __name__ == "__main__":
+    demo.launch()