import streamlit as st from langchain import hub from langchain_chroma import Chroma from langchain_community.document_loaders import PyPDFLoader from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_text_splitters import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import torch import os import tempfile from langchain_groq import ChatGroq # Define the embedding class class SentenceTransformerEmbedding: def __init__(self, model_name): self.model = SentenceTransformer(model_name) def embed_documents(self, texts): embeddings = self.model.encode(texts, convert_to_tensor=True) if isinstance(embeddings, torch.Tensor): return embeddings.cpu().detach().numpy().tolist() # Convert tensor to list return embeddings def embed_query(self, query): embedding = self.model.encode([query], convert_to_tensor=True) if isinstance(embedding, torch.Tensor): return embedding.cpu().detach().numpy().tolist()[0] # Convert tensor to list return embedding[0] # Initialize the embedding class embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2') # Get API keys for Groq and LangChain groq_api_key = "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" langchain_api_key = "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" llm = ChatGroq(model="llama3-8b-8192", groq_api_key=groq_api_key) def load_document(document_path): try: with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = os.path.join(tmp_dir, 'temp.pdf') with open(tmp_file, 'wb') as f: f.write(document_path.getvalue()) loader = PyPDFLoader(tmp_file) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200) splits = text_splitter.split_documents(docs) return splits except Exception as e: return str(e) def initialize_chroma(splits): try: vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model) retriever = vectorstore.as_retriever() prompt = hub.pull("rlm/rag-prompt") def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) return rag_chain except Exception as e: return str(e) def answer_question(rag_chain, query): try: result = rag_chain.invoke(query) return result except Exception as e: return str(e) st.title("PDF Question Answering") st.write("Upload your PDF document and ask a question!") document_path = st.file_uploader("Upload your PDF document", type=["pdf"]) query = st.text_input("Enter your question") if document_path is not None and query: splits = load_document(document_path) if isinstance(splits, str): st.write("Error loading document:", splits) else: rag_chain = initialize_chroma(splits) if isinstance(rag_chain, str): st.write("Error initializing Chroma:", rag_chain) else: result = answer_question(rag_chain, query) st.write("Result:", result) # st.write("Note: Replace `llm` with an appropriate language model.")