| | import streamlit as st |
| | import os |
| | import json |
| | import requests |
| | from langchain_community.document_loaders import PyMuPDFLoader |
| | from openai import OpenAI |
| | import tiktoken |
| | import pandas as pd |
| | from langchain.text_splitter import RecursiveCharacterTextSplitter |
| | from langchain_community.embeddings.openai import OpenAIEmbeddings |
| | from langchain_community.vectorstores import Chroma |
| | import tempfile |
| |
|
| |
|
| | OPENAI_API_KEY = os.environ.get("API_KEY") |
| | OPENAI_API_BASE = os.environ.get("API_BASE") |
| |
|
| | |
| | client = OpenAI( |
| | api_key=OPENAI_API_KEY, |
| | base_url=OPENAI_API_BASE |
| | ) |
| |
|
| | |
| | qna_system_message = """ |
| | You are an AI assistant designed to support professional doctors at St. Bernard's Medical Center. Your task is to provide evidence-based, concise, and relevant medical information to doctors' clinical questions based on the context provided. |
| | |
| | User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of trusted medical literature and research articles relevant to the query, along with their source details. |
| | |
| | When crafting your response: |
| | 1. Use only the provided context to answer the question. |
| | 2. If the answer is found in the context, respond with concise and actionable medical insights. |
| | 3. Include the source reference with the page number, journal name, or publication, as provided in the context. |
| | 4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base." |
| | |
| | Please adhere to the following response guidelines: |
| | - Provide clear, direct answers using only the given context. |
| | - Do not include any additional information outside of the context. |
| | - Avoid rephrasing or summarizing the context unless explicitly relevant to the question. |
| | - If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base." |
| | - If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base." |
| | |
| | Here is an example of how to structure your response: |
| | |
| | Answer: |
| | [Answer based on context] |
| | |
| | Source: |
| | [Source details with page or section] |
| | """ |
| |
|
| | |
| | qna_user_message_template = """ |
| | ###Context |
| | Here are some excerpts from GEN AI Research Paper and their sources that are relevant to the Gen AI question mentioned below: |
| | {context} |
| | |
| | ###Question |
| | {question} |
| | """ |
| |
|
| | @st.cache_resource |
| | def load_and_process_pdfs(uploaded_files): |
| | all_documents = [] |
| | for uploaded_file in uploaded_files: |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: |
| | tmp_file.write(uploaded_file.getvalue()) |
| | tmp_file_path = tmp_file.name |
| | loader = PyMuPDFLoader(tmp_file_path) |
| | documents = loader.load() |
| | all_documents.extend(documents) |
| | os.remove(tmp_file_path) |
| | text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( |
| | encoding_name='cl100k_base', |
| | chunk_size=1000, |
| | ) |
| | document_chunks = text_splitter.split_documents(all_documents) |
| |
|
| | embedding_model = OpenAIEmbeddings( |
| | openai_api_key=OPENAI_API_KEY, |
| | openai_api_base=OPENAI_API_BASE |
| | ) |
| |
|
| | |
| | vectorstore = Chroma.from_documents( |
| | document_chunks, |
| | embedding_model |
| | ) |
| | return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5}) |
| |
|
| | def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95): |
| | |
| | relevant_document_chunks = retriever.get_relevant_documents(query=user_input) |
| | context_list = [d.page_content for d in relevant_document_chunks] |
| |
|
| | |
| | context_for_query = ". ".join(context_list) |
| |
|
| | user_message = qna_user_message_template.replace('{context}', context_for_query) |
| | user_message = user_message.replace('{question}', user_input) |
| |
|
| | |
| | try: |
| | response = client.chat.completions.create( |
| | model="gpt-4o-mini", |
| | messages=[ |
| | {"role": "system", "content": qna_system_message}, |
| | {"role": "user", "content": user_message} |
| | ], |
| | max_tokens=max_tokens, |
| | temperature=temperature, |
| | top_p=top_p |
| | ) |
| | response = response.choices[0].message.content.strip() |
| | except Exception as e: |
| | response = f'Sorry, I encountered the following error: \n {e}' |
| |
|
| | return response |
| |
|
| | |
| | st.title("LLM-Powered Research Assistant") |
| |
|
| | uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True) |
| |
|
| | retriever = None |
| | if uploaded_files: |
| | st.info("Processing uploaded PDFs...") |
| | retriever = load_and_process_pdfs(uploaded_files) |
| | st.success("PDFs processed and ready for questioning!") |
| |
|
| |
|
| | if retriever: |
| | user_question = st.text_input("Ask a question about the uploaded documents:") |
| | if user_question: |
| | with st.spinner("Generating response..."): |
| | rag_response = generate_rag_response(user_question, retriever) |
| | st.write(rag_response) |