AskContext

Sleeping

AskContext / app.py

Muhammad Adnan

Add application file

b7013d9 about 1 year ago

6.82 kB

	import streamlit as st
	from transformers import pipeline
	import pdfplumber
	import logging
	import pandas as pd
	import docx
	import pickle
	import os
	from hashlib import sha256

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Initialize QA pipeline with a pre-trained RoBERTa QA model
	@st.cache_resource
	def init_qa_model():
	try:
	logger.info("Initializing QA model...")
	qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
	logger.info("QA model loaded successfully.")
	return qa_pipeline
	except Exception as e:
	logger.error(f"Error loading QA model: {e}")
	st.error(f"Error loading the QA model: {e}")
	return None

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_file):
	try:
	with pdfplumber.open(pdf_file) as pdf:
	text = ''
	for page in pdf.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	return text or "No text found in the PDF."
	except Exception as e:
	logger.error(f"Error extracting text from PDF: {e}")
	return "Error extracting text from PDF."

	# Function to extract text from TXT files
	def extract_text_from_txt(txt_file):
	try:
	return txt_file.getvalue().decode("utf-8") or "No text found in the TXT file."
	except Exception as e:
	logger.error(f"Error extracting text from TXT file: {e}")
	return "Error extracting text from TXT file."

	# Function to extract text from CSV files
	def extract_text_from_csv(csv_file):
	try:
	df = pd.read_csv(csv_file)
	return df.to_string(index=False) or "No text found in the CSV file."
	except Exception as e:
	logger.error(f"Error extracting text from CSV file: {e}")
	return "Error extracting text from CSV file."

	# Function to extract text from DOCX files
	def extract_text_from_docx(docx_file):
	try:
	doc = docx.Document(docx_file)
	return "\n".join([para.text for para in doc.paragraphs]) or "No text found in the DOCX file."
	except Exception as e:
	logger.error(f"Error extracting text from DOCX file: {e}")
	return "Error extracting text from DOCX file."

	# Function to create a unique cache key for the document
	def generate_cache_key(text):
	return sha256(text.encode('utf-8')).hexdigest()

	# Function to cache embeddings
	def cache_embeddings(embeddings, cache_key):
	try:
	cache_path = f"embeddings_cache/{cache_key}.pkl"
	if not os.path.exists('../embeddings_cache'):
	os.makedirs('../embeddings_cache')
	with open(cache_path, 'wb') as f:
	pickle.dump(embeddings, f)
	logger.info(f"Embeddings cached successfully with key {cache_key}")
	except Exception as e:
	logger.error(f"Error caching embeddings: {e}")

	# Function to load cached embeddings
	def load_cached_embeddings(cache_key):
	try:
	cache_path = f"embeddings_cache/{cache_key}.pkl"
	if os.path.exists(cache_path):
	with open(cache_path, 'rb') as f:
	embeddings = pickle.load(f)
	logger.info(f"Embeddings loaded from cache with key {cache_key}")
	return embeddings
	return None
	except Exception as e:
	logger.error(f"Error loading cached embeddings: {e}")
	return None

	# Main function for the app
	def main():
	st.title("Adnan AI Labs QA System")
	st.markdown("Upload documents (PDF, TXT, CSV, or DOCX) or add context manually, and ask questions.")

	uploaded_files = st.file_uploader("Upload Documents", type=["pdf", "txt", "csv", "docx"], accept_multiple_files=True)
	extracted_text_box = st.text_area("Manually add extra context for answering questions", height=200)

	# Initialize QA model
	qa_pipeline = init_qa_model()
	document_texts = []

	# Extract text from each uploaded file
	if uploaded_files:
	for uploaded_file in uploaded_files:
	if uploaded_file.type == "application/pdf":
	document_texts.append(extract_text_from_pdf(uploaded_file))
	elif uploaded_file.type == "text/plain":
	document_texts.append(extract_text_from_txt(uploaded_file))
	elif uploaded_file.type in ["application/vnd.ms-excel", "text/csv"]:
	document_texts.append(extract_text_from_csv(uploaded_file))
	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	document_texts.append(extract_text_from_docx(uploaded_file))

	# Combine all extracted texts and manual context
	combined_context = "\n".join(document_texts) + "\n" + extracted_text_box

	# Check if any content is available to answer questions
	user_question = st.text_input("Ask a question:")
	if user_question and combined_context.strip():
	if st.button("Get Answer"):
	with st.spinner('Processing your question...'):
	# Generate a unique cache key for the combined context
	cache_key = generate_cache_key(combined_context)

	# Check for cached embeddings
	cached_embeddings = load_cached_embeddings(cache_key)
	if cached_embeddings is None:
	# Process document embeddings if not cached
	logger.info("Generating new embeddings...")
	# embeddings = model.encode(combined_context)
	cache_embeddings(cached_embeddings, cache_key) # Cache the embeddings

	# Use the QA pipeline to answer the question
	answer = qa_pipeline(question=user_question, context=combined_context)
	if answer['answer']:
	st.write("Answer:", answer['answer'])
	else:
	st.warning("No suitable answer found. Please rephrase your question.")
	else:
	if not user_question:
	st.info("Please enter a question to get an answer.")
	elif not combined_context.strip():
	st.info("Please upload a document or add context manually.")

	# Display Buy Me a Coffee button


	st.markdown("""
	<div style="text-align: center;">
	<p>If you find this project useful, consider buying me a coffee to support further development! ☕️</p>
	<a href="https://buymeacoffee.com/adnanailabs">
	<img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me a Coffee" style="height: 50px;">
	</a>
	</div>
	""", unsafe_allow_html=True)

	if __name__ == "__main__":
	try:
	main()
	except Exception as e:
	logger.critical(f"Critical error: {e}")
	st.error(f"A critical error occurred: {e}")