Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| from transformers import pipeline, BitsAndBytesConfig | |
| import torch | |
| # 1. SETUP & CONFIG | |
| # Use environment variables for secrets like your HF_TOKEN | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # Initialize Embeddings (Medical-specific) | |
| embeddings = HuggingFaceEmbeddings(model_name="NeuML/pubmedbert-base-embeddings") | |
| # 2. DYNAMIC LOADING FUNCTION | |
| def build_or_load_db(): | |
| if os.path.exists("./chroma_db"): | |
| return Chroma(persist_directory="./chroma_db", embedding_function=embeddings) | |
| # If no DB, load PDFs from the 'data' folder in your repo | |
| loader = DirectoryLoader('./data/', glob="./*.pdf", loader_cls=PyPDFLoader) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) | |
| chunks = splitter.split_documents(docs) | |
| return Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_db") | |
| vector_db = build_or_load_db() | |
| # 3. LLM SETUP (Quantized for CPU/GPU flexibility) | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model="google/gemma-2-2b-it", | |
| model_kwargs={"quantization_config": quant_config}, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| token=HF_TOKEN | |
| ) | |
| # 4. CHAT LOGIC | |
| def predict(message, history): | |
| # 1. Retrieval (Your Search Engine) | |
| results = vector_db.similarity_search(message, k=3) | |
| context = "\n---\n".join([d.page_content for d in results]) | |
| sources = "\n".join([f"- {d.metadata['source']} (Pg {d.metadata['page']})" for d in results]) | |
| # 2. THE PROMPT (The part you were looking for!) | |
| # This is where we force the AI to stay in character and use only the data. | |
| prompt = f""" | |
| You are a clinical assistant specializing in radiomics. | |
| Use ONLY the following snippets of medical evidence to answer the question. | |
| If the answer is not contained in the text, say "No specific evidence found in database." | |
| Do not hallucinate. Provide a concise answer. | |
| EVIDENCE: | |
| {context} | |
| QUESTION: {message} | |
| ANSWER:""" | |
| # 3. Generation | |
| # We strip the prompt out of the result so the user only sees the 'Answer' | |
| response = pipe(prompt, max_new_tokens=150)[0]['generated_text'].split("ANSWER:")[-1] | |
| return response.strip() | |
| # 5. GRADIO INTERFACE | |
| gr.ChatInterface(predict, title="Radiomics Clinical Assistant").launch() | |