Spaces:

serene-shen
/

RAG_test

Sleeping

App Files Files Community

RAG_test / app.py

serene-shen

Update app.py

17e4bcd verified 3 months ago

raw

history blame contribute delete

2.73 kB

	import os
	import gradio as gr
	from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_chroma import Chroma
	from transformers import pipeline, BitsAndBytesConfig
	import torch

	# 1. SETUP & CONFIG
	# Use environment variables for secrets like your HF_TOKEN
	HF_TOKEN = os.getenv("HF_TOKEN")

	# Initialize Embeddings (Medical-specific)
	embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")

	# 2. DYNAMIC LOADING FUNCTION
	def build_or_load_db():
	if os.path.exists("./chroma_db"):
	return Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

	# If no DB, load PDFs from the 'data' folder in your repo
	loader = DirectoryLoader('./data/', glob="./*.pdf", loader_cls=PyPDFLoader)
	docs = loader.load()

	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
	chunks = splitter.split_documents(docs)

	return Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_db")

	vector_db = build_or_load_db()

	# 3. LLM SETUP (Quantized for CPU/GPU flexibility)
	quant_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
	)

	pipe = pipeline(
	"text-generation",
	model="google/gemma-2-2b-it",
	model_kwargs={"quantization_config": quant_config},
	device_map="auto" if torch.cuda.is_available() else None,
	token=HF_TOKEN
	)

	# 4. CHAT LOGIC
	def predict(message, history):
	# 1. Retrieval (Your Search Engine)
	results = vector_db.similarity_search(message, k=3)
	context = "\n---\n".join([d.page_content for d in results])
	sources = "\n".join([f"- {d.metadata['source']} (Pg {d.metadata['page']})" for d in results])

	# 2. THE PROMPT (The part you were looking for!)
	# This is where we force the AI to stay in character and use only the data.
	prompt = f"""
	You are a clinical assistant specializing in radiomics.
	Use ONLY the following snippets of medical evidence to answer the question.
	If the answer is not contained in the text, say "No specific evidence found in database."
	Do not hallucinate. Provide a concise answer.

	EVIDENCE:
	{context}

	QUESTION: {message}
	ANSWER:"""

	# 3. Generation
	# We strip the prompt out of the result so the user only sees the 'Answer'
	response = pipe(prompt, max_new_tokens=150)[0]['generated_text'].split("ANSWER:")[-1]

	return response.strip()

	# 5. GRADIO INTERFACE
	gr.ChatInterface(predict, title="Radiomics Clinical Assistant").launch()