chatbot_afore / app.py
josequinonez's picture
Update app.py
50b25c8 verified
import streamlit as st
import os
from glob import glob # Added glob for finding files
#import json # No longer directly reading config.json in app.py
#import requests
from langchain_community.document_loaders import PyMuPDFLoader
from openai import OpenAI
import tiktoken
#import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
#import tempfile # No longer needed for files already on disk
# Retrieve secrets from environment variables
# Ensure these are set in your deployment environment (e.g., Streamlit Cloud secrets)
OPENAI_API_KEY = os.environ.get("API_KEY")
OPENAI_API_BASE = os.environ.get("API_BASE")
# Initialize OpenAI client
client = OpenAI(
api_key=OPENAI_API_KEY,
base_url=OPENAI_API_BASE
)
# Define the system prompt for the model
qna_system_message = """
You are Customer service AI assistant designed to support customers in efficiently reviewing operational manuals. Your task is to provide evidence-based, concise, and relevant summaries based on the context provided from documents.
User input will include the necessary context for you to answer their questions. This context will begin with the token:
###Context
The context contains excerpts from one or more documents in spanish, along with associated metadata such as titles, authors, abstracts, keywords, and specific sections relevant to the query.
When crafting your response
-Use only the provided context to answer the question.
-If the answer is found in the context, respond with concise and insight-focused summaries.
-Include the paper title and, where applicable, arXiv ID or section reference as the source.
-If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."
Please adhere to the following response guidelines:
-Provide clear, direct answers in spanish using only the given context.
-Do not include any additional information outside of the context.
-Avoid rephrasing or generalizing unless explicitly relevant to the question.
-If no relevant answer exists in the context, respond with: "Disculpa, no tengo el conocimiento para responder a esa pregunta."
-If the context is not provided, your response should also be: "Disculpa, no tengo el conocimiento para responder a esa pregunta."
Here is an example of how to structure your response:
Respuesta:
[Answer based on context]
Fuente:
[Source details with page or section]
"""
# Define the user message template
qna_user_message_template = """
###Context
Here are some excerpts from source documents that are relevant to the mentioned below:
{context}
###Question
{question}
"""
# System message for query expansion
expansion_system_message = """
You are a helpful assistant specialized in rephrasing user questions to improve the relevance of document retrieval. Your task is to take a user's original question and generate an expanded or rephrased version that is more likely to match relevant document sections, especially considering temporal contexts or implied information. Do not answer the question; only rephrase or expand it.
For example:
Original Question: "¿cuál modalidad de retiro le corresponde a una persona empezó a trabajar en 1990?"
Expanded Question: "¿cuál modalidad de retiro le corresponde a una persona que empezó a trabajar antes de 1997 o en 1990, estaba afiliada al IMSS, específicamente bajo la Ley del Seguro Social de 1973?"
Provide only the expanded question, without any conversational filler.
"""
@st.cache_resource
def load_and_process_pdfs(pdf_file_paths): # Changed parameter to accept file paths
all_documents = []
for pdf_file_path in pdf_file_paths:
loader = PyMuPDFLoader(pdf_file_path)
documents = loader.load()
all_documents.extend(documents)
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
encoding_name='cl100k_base',
chunk_size=1000,
chunk_overlap=100
)
document_chunks = text_splitter.split_documents(all_documents)
embedding_model = OpenAIEmbeddings(
openai_api_key=OPENAI_API_KEY,
openai_api_base=OPENAI_API_BASE
)
# Create an in-memory vector store (or use a persistent one if needed)
vectorstore = Chroma.from_documents(
document_chunks,
embedding_model
)
return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5})
def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95):
# Retrieve relevant document chunks
relevant_document_chunks = retriever.get_relevant_documents(query=user_input)
context_list = [d.page_content for d in relevant_document_chunks]
# Combine document chunks into a single context
context_for_query = ". ".join(context_list)
user_message = qna_user_message_template.replace('{context}', context_for_query)
user_message = user_message.replace('{question}', user_input)
# Generate the response
try:
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": qna_system_message},
{"role": "user", "content": user_message}
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
response = response.choices[0].message.content.strip()
except Exception as e:
response = f'Sorry, I encountered the following error: \n {e}'
return response
def query_expansion(user_question, model_name="gpt-4o-mini", temperature=0.3):
global client, expansion_system_message
try:
response = client.chat.completions.create(
model=model_name,
messages=[
{"role": "system", "content": expansion_system_message},
{"role": "user", "content": user_question}
],
temperature=temperature,
max_tokens=150
)
expanded_query = response.choices[0].message.content.strip()
return expanded_query
except Exception as e:
st.error(f"Error during query expansion: {e}") # Use st.error for Streamlit app
return user_question # Fallback to original question if expansion fails
def generate_rag_response_complete(user_question, retriever, k=5, max_tokens=500, temperature=0.3, top_p=0.95):
# First, try to generate a response with the original question
response = generate_rag_response(user_question, retriever, max_tokens, temperature, top_p)
# If the initial response indicates no knowledge, try query expansion
if "Disculpa, no tengo el conocimiento para responder a esa pregunta." in response:
st.info("Initial RAG failed. Attempting query expansion...") # Use st.info for Streamlit app
expanded_question = query_expansion(user_question, temperature=temperature) # Pass temperature
st.info(f"Expanded Question: {expanded_question}")
# Call RAG again with the expanded question
response = generate_rag_response(expanded_question, retriever, max_tokens, temperature, top_p)
return response
# Streamlit App
st.title("LLM-Powered Research Assistant")
# Define the path to the data directory within the app's context
DATA_DIR = "data"
# Find all PDF files in the data directory
pdf_files = glob(os.path.join(DATA_DIR, "*.pdf"))
retriever = None
if pdf_files: # Check if any PDF files were found
st.info(f"Processing {len(pdf_files)} PDFs from the '{DATA_DIR}' directory...")
retriever = load_and_process_pdfs(pdf_files) # Call with the list of file paths
st.success("PDFs processed and ready for questioning!")
else:
st.warning(f"No PDF files found in the '{DATA_DIR}' directory. Please ensure your PDFs are in this folder and rebuild the Docker image if deploying.")
if retriever:
user_question = st.text_input("Ask a question about the documents:")
if user_question:
with st.spinner("Generating response..."):
rag_response = generate_rag_response_complete(user_question, retriever) # Use the complete function
st.write(rag_response)