RAG_test / app.py
serene-shen's picture
Update app.py
17e4bcd verified
import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from transformers import pipeline, BitsAndBytesConfig
import torch
# 1. SETUP & CONFIG
# Use environment variables for secrets like your HF_TOKEN
HF_TOKEN = os.getenv("HF_TOKEN")
# Initialize Embeddings (Medical-specific)
embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
# 2. DYNAMIC LOADING FUNCTION
def build_or_load_db():
if os.path.exists("./chroma_db"):
return Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
# If no DB, load PDFs from the 'data' folder in your repo
loader = DirectoryLoader('./data/', glob="./*.pdf", loader_cls=PyPDFLoader)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(docs)
return Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_db")
vector_db = build_or_load_db()
# 3. LLM SETUP (Quantized for CPU/GPU flexibility)
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
pipe = pipeline(
"text-generation",
model="google/gemma-2-2b-it",
model_kwargs={"quantization_config": quant_config},
device_map="auto" if torch.cuda.is_available() else None,
token=HF_TOKEN
)
# 4. CHAT LOGIC
def predict(message, history):
# 1. Retrieval (Your Search Engine)
results = vector_db.similarity_search(message, k=3)
context = "\n---\n".join([d.page_content for d in results])
sources = "\n".join([f"- {d.metadata['source']} (Pg {d.metadata['page']})" for d in results])
# 2. THE PROMPT (The part you were looking for!)
# This is where we force the AI to stay in character and use only the data.
prompt = f"""
You are a clinical assistant specializing in radiomics.
Use ONLY the following snippets of medical evidence to answer the question.
If the answer is not contained in the text, say "No specific evidence found in database."
Do not hallucinate. Provide a concise answer.
EVIDENCE:
{context}
QUESTION: {message}
ANSWER:"""
# 3. Generation
# We strip the prompt out of the result so the user only sees the 'Answer'
response = pipe(prompt, max_new_tokens=150)[0]['generated_text'].split("ANSWER:")[-1]
return response.strip()
# 5. GRADIO INTERFACE
gr.ChatInterface(predict, title="Radiomics Clinical Assistant").launch()