from langchain_community.document_loaders import PDFPlumberLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.prompts import PromptTemplate import gradio as gr import os from langchain_groq import ChatGroq from dotenv import load_dotenv MAX_DOC_LENGTH = 4000 def process_pdf(file): try: temp_path = file.name if not os.path.exists(temp_path): return "Error: Uploaded file path does not exist." loader = PDFPlumberLoader(temp_path) result = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20) split_docs = splitter.split_documents(result) # Extract text from the split documents document_text = "\n".join([doc.page_content for doc in split_docs]) document_text = document_text[:MAX_DOC_LENGTH] return document_text # Returning the full document text except Exception as e: return f"Error processing PDF: {str(e)}" def initialize_llm(): load_dotenv() groq_api_key = os.getenv("Groq_API_Key") if not groq_api_key: raise ValueError("GROQ_API_KEY environment variable not set.") return ChatGroq( model= "llama3-8b-8192", temperature=0.7, api_key=groq_api_key, verbose=False ) def create_prompt(): return PromptTemplate( input_variables=["document", "question"], template=( "You are an AI assistant that provides precise answers based on the given document. " "Use only the information available in the document to respond.\n\n" "Document:\n{document}\n\n" "Question: {question}\n" "Answer:" ) ) def generate_answer(chain, document_text, user_input): try: response = chain.invoke({"document": document_text, "question": user_input}) answer = response.content return str(answer) except Exception as e: return f"Error generating answer: {str(e)}" def handle_file(file, user_input): if not file: return "Please upload a PDF document." document_text = process_pdf(file) if isinstance(document_text, str) and document_text.startswith("Error"): return document_text # Return error message if processing failed if not user_input.strip(): return "Please enter a question." llm = initialize_llm() prompt = create_prompt() chain = prompt | llm return generate_answer(chain, document_text, user_input) interface = gr.Interface( fn=handle_file, inputs=[ gr.File(label="Upload PDF"), gr.Textbox(lines=2, placeholder="Enter your question here...") ], outputs=gr.Textbox(label="Answer"), title="Ask My PDF", description="Upload a PDF document and ask questions about its content." ) interface.launch()