File size: 2,290 Bytes
e947e06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a863b1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import zipfile
import gradio as gr

from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.schema.runnable import Runnable

DATA_DIR = "Week14_content"
ZIP_FILE = "Week_14__MLS14 - Adv RAG.zip"


def unzip_if_needed():
    if not os.path.exists(DATA_DIR):
        with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
            zip_ref.extractall(DATA_DIR)


def load_documents():
    documents = []
    for root, _, files in os.walk(DATA_DIR):
        for file in files:
            if file.endswith(".pdf"):
                loader = PyPDFLoader(os.path.join(root, file))
                documents.extend(loader.load())
    return documents


def build_rag_chain(api_key: str) -> Runnable:
    unzip_if_needed()

    docs = load_documents()
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    split_docs = splitter.split_documents(docs)

    embedding = OpenAIEmbeddings(openai_api_key=api_key)
    vectorstore = Chroma.from_documents(split_docs, embedding)

    retriever = vectorstore.as_retriever()
    llm = ChatOpenAI(openai_api_key=api_key, model="gpt-4-turbo", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa_chain


def query_rag(api_key: str, user_question: str) -> str:
    if not api_key or not user_question:
        return "Please provide both your OpenAI API key and a question."
    try:
        chain = build_rag_chain(api_key)
        result = chain.run(user_question)
        return result
    except Exception as e:
        return f"❌ Error: {str(e)}"


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# πŸ” RAG QA App\nUpload documents once (zipped), enter your OpenAI key, and ask questions.")
    
    api_key_input = gr.Textbox(label="πŸ”‘ OpenAI API Key", type="password")
    question_input = gr.Textbox(label="❓ Your Question")
    output_box = gr.Textbox(label="πŸ“„ Answer", lines=10)
    ask_button = gr.Button("Ask")

    ask_button.click(fn=query_rag, inputs=[api_key_input, question_input], outputs=output_box)