File size: 2,589 Bytes
9e2f9ad
 
 
34ba555
54adfc6
 
34ba555
9e2f9ad
54adfc6
 
 
9e2f9ad
 
 
 
 
 
 
 
54adfc6
 
9e2f9ad
 
54adfc6
9e2f9ad
 
 
 
54adfc6
9e2f9ad
54adfc6
9e2f9ad
 
 
54adfc6
9e2f9ad
 
 
54adfc6
9e2f9ad
 
 
54adfc6
9e2f9ad
54adfc6
9e2f9ad
 
54adfc6
 
9e2f9ad
54adfc6
 
 
 
9e2f9ad
54adfc6
 
 
 
 
 
 
 
 
 
 
 
9e2f9ad
54adfc6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import pipeline
import gradio as gr

# Step 1: Initialize Vector Store
vector_store = None

# Step 2: Upload and Process PDF Documents
def upload_and_process_pdf(file):
    global vector_store
    # Load PDF documents using PyPDFLoader
    loader = PyPDFLoader(file.name)
    docs = loader.load()

    # Generate embeddings and create a vector store
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    vector_store = FAISS.from_documents(docs, embeddings)
    return "Document uploaded and processed successfully."

# Step 3: Set up Generator (using FLAN-T5)
generator_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

def generate_answer(context, query):
    input_text = f"Question: {query}\nContext: {context}"
    answer = generator_pipeline(input_text, max_length=100, do_sample=True)[0]['generated_text']
    return answer

# Step 4: Build the Retrieval-Augmented Generation Function
def rag_system(query):
    global vector_store
    if vector_store is None:
        return "No documents uploaded. Please upload a document first.", ""

    retriever = vector_store.as_retriever()
    results = retriever.get_relevant_documents(query)
    context = " ".join([doc.page_content for doc in results[:2]])  # Use top 2 documents

    # Generate the answer
    answer = generate_answer(context, query)
    return answer, context

# Step 5: Create Gradio Interface
def query_rag(question):
    answer, context = rag_system(question)
    return answer, context

def upload_document(file):
    return upload_and_process_pdf(file)

interface = gr.Blocks()

with interface:
    gr.Markdown("# RAG System with PDF Upload (LangChain Integration)")
    with gr.Tab("Ask a Question"):
        question = gr.Textbox(label="Enter your question")
        answer = gr.Textbox(label="Generated Answer")
        context = gr.Textbox(label="Context")
        query_button = gr.Button("Get Answer")
        query_button.click(query_rag, inputs=question, outputs=[answer, context])
    with gr.Tab("Upload Document"):
        file_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_button = gr.Button("Upload and Process")
        upload_output = gr.Textbox(label="Upload Status")
        upload_button.click(upload_document, inputs=file_upload, outputs=upload_output)

# Step 6: Launch the Interface
if __name__ == "__main__":
    interface.launch()