File size: 2,889 Bytes
b24276f
 
c14ec06
b24276f
e875fc5
18b4b23
c14ec06
b24276f
c14ec06
 
b24276f
 
c14ec06
 
 
 
 
b24276f
c14ec06
 
b24276f
c14ec06
 
 
18b4b23
c14ec06
 
 
ede7a86
b24276f
c14ec06
11070d5
18b4b23
 
b24276f
ede7a86
c14ec06
18b4b23
eabb883
c14ec06
b24276f
 
c14ec06
 
 
 
 
 
 
 
 
 
b24276f
 
c14ec06
b24276f
c14ec06
 
 
b24276f
 
 
 
 
 
 
c14ec06
 
 
 
 
 
b24276f
 
 
 
 
c14ec06
b24276f
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
import gradio as gr
import os
from langchain_groq import ChatGroq
from dotenv import load_dotenv


MAX_DOC_LENGTH = 4000 
def process_pdf(file):
    try:
        temp_path = file.name  
        if not os.path.exists(temp_path):
            return "Error: Uploaded file path does not exist."

        loader = PDFPlumberLoader(temp_path)
        result = loader.load()
        splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
        split_docs = splitter.split_documents(result)

        # Extract text from the split documents
        document_text = "\n".join([doc.page_content for doc in split_docs])
        document_text = document_text[:MAX_DOC_LENGTH]

        return document_text  # Returning the full document text
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

def initialize_llm():
    load_dotenv()
    groq_api_key = os.getenv("Groq_API_Key")
    if not groq_api_key:
        raise ValueError("GROQ_API_KEY environment variable not set.") 

    return ChatGroq(
        model= "llama3-8b-8192",  
        temperature=0.7,
        api_key=groq_api_key,
        verbose=False
    )

def create_prompt():
    return PromptTemplate(
        input_variables=["document", "question"],
        template=(
            "You are an AI assistant that provides precise answers based on the given document. "
            "Use only the information available in the document to respond.\n\n"
            "Document:\n{document}\n\n"
            "Question: {question}\n"
            "Answer:"
        )
    )

def generate_answer(chain, document_text, user_input):
    try:
        response = chain.invoke({"document": document_text, "question": user_input})  
        answer = response.content
        return str(answer)
    except Exception as e:
        return f"Error generating answer: {str(e)}"

def handle_file(file, user_input):
    if not file:
        return "Please upload a PDF document."
    
    document_text = process_pdf(file)
    if isinstance(document_text, str) and document_text.startswith("Error"):
        return document_text  # Return error message if processing failed

    if not user_input.strip():
        return "Please enter a question."

    llm = initialize_llm()
    prompt = create_prompt()
    chain = prompt | llm

    return generate_answer(chain, document_text, user_input)

interface = gr.Interface(
    fn=handle_file,
    inputs=[
        gr.File(label="Upload PDF"),
        gr.Textbox(lines=2, placeholder="Enter your question here...")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="Ask My PDF",
    description="Upload a PDF document and ask questions about its content."
)

interface.launch()