prithvi1029 commited on
Commit
9d01ce0
·
verified ·
1 Parent(s): 927e0d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.chat_models import ChatOpenAI
9
+
10
+ def run_qa(pdf_file, question):
11
+ if pdf_file is None or question.strip() == "":
12
+ return "Please upload a PDF and enter a question."
13
+
14
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
15
+ tmp.write(pdf_file)
16
+ pdf_path = tmp.name
17
+
18
+ loader = PyPDFLoader(pdf_path)
19
+ docs = loader.load()
20
+
21
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
22
+ chunks = splitter.split_documents(docs)
23
+
24
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
25
+ vectordb = FAISS.from_documents(chunks, embeddings)
26
+
27
+ llm = ChatOpenAI(temperature=0)
28
+
29
+ qa = RetrievalQA.from_chain_type(
30
+ llm=llm,
31
+ retriever=vectordb.as_retriever(),
32
+ return_source_documents=True
33
+ )
34
+
35
+ result = qa(question)
36
+
37
+ sources = "\n\n".join(
38
+ [doc.page_content[:500] for doc in result["source_documents"][:2]]
39
+ )
40
+
41
+ return f"### Answer\n{result['result']}\n\n---\n### Sources\n{sources}"
42
+
43
+ with gr.Blocks(title="Agentic Document Intelligence") as demo:
44
+ gr.Markdown(
45
+ "# 📄 Agentic Document Intelligence\nUpload a PDF and ask questions using RAG."
46
+ )
47
+
48
+ pdf = gr.File(label="Upload PDF", type="binary")
49
+ question = gr.Textbox(label="Ask a question")
50
+ output = gr.Markdown()
51
+
52
+ btn = gr.Button("Run")
53
+ btn.click(run_qa, inputs=[pdf, question], outputs=output)
54
+
55
+ demo.launch()