Spaces:
Sleeping
Sleeping
File size: 2,889 Bytes
b24276f c14ec06 b24276f e875fc5 18b4b23 c14ec06 b24276f c14ec06 b24276f c14ec06 b24276f c14ec06 b24276f c14ec06 18b4b23 c14ec06 ede7a86 b24276f c14ec06 11070d5 18b4b23 b24276f ede7a86 c14ec06 18b4b23 eabb883 c14ec06 b24276f c14ec06 b24276f c14ec06 b24276f c14ec06 b24276f c14ec06 b24276f c14ec06 b24276f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
import gradio as gr
import os
from langchain_groq import ChatGroq
from dotenv import load_dotenv
MAX_DOC_LENGTH = 4000
def process_pdf(file):
try:
temp_path = file.name
if not os.path.exists(temp_path):
return "Error: Uploaded file path does not exist."
loader = PDFPlumberLoader(temp_path)
result = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
split_docs = splitter.split_documents(result)
# Extract text from the split documents
document_text = "\n".join([doc.page_content for doc in split_docs])
document_text = document_text[:MAX_DOC_LENGTH]
return document_text # Returning the full document text
except Exception as e:
return f"Error processing PDF: {str(e)}"
def initialize_llm():
load_dotenv()
groq_api_key = os.getenv("Groq_API_Key")
if not groq_api_key:
raise ValueError("GROQ_API_KEY environment variable not set.")
return ChatGroq(
model= "llama3-8b-8192",
temperature=0.7,
api_key=groq_api_key,
verbose=False
)
def create_prompt():
return PromptTemplate(
input_variables=["document", "question"],
template=(
"You are an AI assistant that provides precise answers based on the given document. "
"Use only the information available in the document to respond.\n\n"
"Document:\n{document}\n\n"
"Question: {question}\n"
"Answer:"
)
)
def generate_answer(chain, document_text, user_input):
try:
response = chain.invoke({"document": document_text, "question": user_input})
answer = response.content
return str(answer)
except Exception as e:
return f"Error generating answer: {str(e)}"
def handle_file(file, user_input):
if not file:
return "Please upload a PDF document."
document_text = process_pdf(file)
if isinstance(document_text, str) and document_text.startswith("Error"):
return document_text # Return error message if processing failed
if not user_input.strip():
return "Please enter a question."
llm = initialize_llm()
prompt = create_prompt()
chain = prompt | llm
return generate_answer(chain, document_text, user_input)
interface = gr.Interface(
fn=handle_file,
inputs=[
gr.File(label="Upload PDF"),
gr.Textbox(lines=2, placeholder="Enter your question here...")
],
outputs=gr.Textbox(label="Answer"),
title="Ask My PDF",
description="Upload a PDF document and ask questions about its content."
)
interface.launch()
|