Spaces:
Running
Running
| import gradio as gr | |
| from helper import download_hugging_face_embeddings | |
| from langchain_pinecone import PineconeVectorStore | |
| from langchain_openai import ChatOpenAI | |
| from langchain.chains import create_retrieval_chain | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain.memory import ConversationBufferMemory | |
| from prompt import * | |
| import os | |
| import re | |
| # Get API keys from environment (Hugging Face Spaces secrets) | |
| PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY') | |
| OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') | |
| # Verify API keys are present | |
| if not PINECONE_API_KEY: | |
| raise ValueError("PINECONE_API_KEY not found! Please add it in Space Settings > Secrets") | |
| if not OPENAI_API_KEY: | |
| raise ValueError("OPENAI_API_KEY not found! Please add it in Space Settings > Secrets") | |
| print("API keys loaded successfully!") | |
| embeddings = download_hugging_face_embeddings() | |
| index_name = "medical-chatbot" | |
| # Pass API key explicitly to Pinecone | |
| from pinecone import Pinecone | |
| pc = Pinecone(api_key=PINECONE_API_KEY) | |
| docsearch = PineconeVectorStore.from_existing_index( | |
| index_name=index_name, | |
| embedding=embeddings | |
| ) | |
| retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":10}) | |
| chatModel = ChatOpenAI(model="gpt-5.2", api_key=OPENAI_API_KEY) | |
| memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", system_prompt), | |
| ("system", "Conversation so far: {chat_history}"), | |
| ("human", "{input}"), | |
| ]) | |
| question_answer_chain = create_stuff_documents_chain(chatModel, prompt) | |
| rag_chain = create_retrieval_chain(retriever, question_answer_chain) | |
| def format_citation(filename): | |
| """ | |
| Convert filename format 'Author_Year - Title' to 'Author (Year) β Title' | |
| Handles 'et al.' and '&' in author names properly | |
| Examples: | |
| 'Ng et al._2023 - Problems and Solutions' -> 'Ng et al. (2023) β Problems and Solutions' | |
| 'Godley & Xia_2016 - Physics Guide' -> 'Godley & Xia (2016) β Physics Guide' | |
| 'Khan (2003) - Therapy' -> 'Khan (2003) β Therapy' | |
| """ | |
| # Remove file extension if present | |
| filename = re.sub(r'\.(pdf|txt)$', '', filename, flags=re.IGNORECASE) | |
| # Pattern 1: Handle "Author_YEAR - Title" format | |
| match = re.match(r'^(.+?)_(\d{4})\s*-\s*(.+)$', filename) | |
| if match: | |
| author = match.group(1) | |
| year = match.group(2) | |
| title = match.group(3) | |
| # Replace underscores in author names with spaces | |
| author = author.replace('_', ' ') | |
| # Format: Author (Year) β Title | |
| return f"{author} ({year}) β {title}" | |
| # Pattern 2: Already has parentheses "Author (YEAR) - Title" | |
| match = re.match(r'^(.+?)\s*\((\d{4})\)\s*-\s*(.+)$', filename) | |
| if match: | |
| author = match.group(1) | |
| year = match.group(2) | |
| title = match.group(3) | |
| # Replace underscores and hyphens with proper formatting | |
| author = author.replace('_', ' ') | |
| return f"{author} ({year}) β {title}" | |
| # Pattern 3: Just clean up underscores and hyphens in any format | |
| filename = filename.replace('_', ' ') | |
| filename = re.sub(r'\s*-\s*', ' β ', filename) | |
| return filename | |
| def format_latex_for_gradio(text): | |
| """Convert LaTeX delimiters to Gradio-friendly format while preserving markdown""" | |
| # Convert display math \[ ... \] to $$ ... $$ | |
| text = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', text, flags=re.DOTALL) | |
| # Convert inline math \( ... \) to $ ... $ | |
| text = re.sub(r'\\\((.*?)\\\)', r'$\1$', text) | |
| return text | |
| def chat_function(message, history): | |
| memory.chat_memory.add_user_message(message) | |
| response = rag_chain.invoke({ | |
| "input": message, | |
| "chat_history": memory.load_memory_variables({})["chat_history"] | |
| }) | |
| final_answer = response["answer"] | |
| # Extract sources from retrieved documents | |
| source_documents = response.get("context", []) | |
| unique_sources = [] | |
| seen_sources = set() | |
| for doc in source_documents: | |
| source = doc.metadata.get('source', None) | |
| if source and source not in seen_sources: | |
| # Format the citation properly | |
| formatted_source = format_citation(source) | |
| unique_sources.append(formatted_source) | |
| seen_sources.add(source) | |
| # Remove ANY existing "Sources:" section that the LLM generated | |
| if "Sources:" in final_answer: | |
| final_answer = re.split(r'\n*Sources:\s*', final_answer)[0].strip() | |
| # Check if the answer says information was NOT found in retrieved documents | |
| not_found_phrases = [ | |
| "retrieved documents do not contain", | |
| "not found in the retrieved", | |
| "no information about this", | |
| "retrieved documents do not mention", | |
| "not available in the retrieved" | |
| ] | |
| info_not_in_docs = any(phrase in final_answer.lower() for phrase in not_found_phrases) | |
| # Enforce correct citations | |
| if info_not_in_docs: | |
| final_answer += "\n\n**Sources:**\n\nNone - Answer based on general medical physics knowledge" | |
| elif unique_sources: | |
| final_answer += "\n\n**Sources:**\n\n" | |
| for source in unique_sources: | |
| final_answer += f"- {source}\n" | |
| else: | |
| final_answer += "\n\n**Sources:**\n\nNone available" | |
| # FORMAT LATEX FOR GRADIO | |
| final_answer = format_latex_for_gradio(final_answer) | |
| memory.chat_memory.add_ai_message(final_answer) | |
| return final_answer | |
| # Create Gradio interface with formatting support | |
| demo = gr.ChatInterface( | |
| fn=chat_function, | |
| title="β’οΈ Radiotherapy Chatbot", | |
| description="By: Haneen Sakaji", | |
| examples=[ | |
| "What is an organ at risk?", | |
| "What are the guidelines for single photon beam use?", | |
| "Calculate the activity of an Ir-192 source after 2 months if initial activity is 13.5 Ci" | |
| ], | |
| chatbot=gr.Chatbot( | |
| latex_delimiters=[ | |
| {"left": "$$", "right": "$$", "display": True}, | |
| {"left": "$", "right": "$", "display": False}, | |
| ], | |
| height=600, | |
| ), | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |