prithvi1029 commited on
Commit
43657dc
Β·
verified Β·
1 Parent(s): 9fff9a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -1,52 +1,54 @@
1
  import gradio as gr
2
- import tempfile
3
 
4
  from langchain_community.document_loaders import PyPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
  from langchain.chains import RetrievalQA
9
  from langchain_openai import ChatOpenAI
10
 
11
- def run_qa(pdf_file, question):
12
- if pdf_file is None or question.strip() == "":
13
- return "Please upload a PDF and enter a question."
14
 
15
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
16
- tmp.write(pdf_file)
17
- pdf_path = tmp.name
18
 
 
19
  loader = PyPDFLoader(pdf_path)
20
  docs = loader.load()
21
 
 
22
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
23
  chunks = splitter.split_documents(docs)
24
 
 
25
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
26
  vectordb = FAISS.from_documents(chunks, embeddings)
27
 
 
28
  llm = ChatOpenAI(temperature=0)
29
 
 
30
  qa = RetrievalQA.from_chain_type(
31
  llm=llm,
32
  retriever=vectordb.as_retriever(),
33
  return_source_documents=True
34
  )
35
 
36
- result = qa(question)
 
37
 
38
- sources = "\n\n".join(
39
- [doc.page_content[:500] for doc in result["source_documents"][:2]]
40
- )
 
 
 
41
 
42
- return f"### Answer\n{result['result']}\n\n---\n### Sources\n{sources}"
43
 
44
  with gr.Blocks(title="Agentic Document Intelligence") as demo:
45
- gr.Markdown(
46
- "# πŸ“„ Agentic Document Intelligence\nUpload a PDF and ask questions using RAG."
47
- )
48
 
49
- pdf = gr.File(label="Upload PDF", type="binary")
50
  question = gr.Textbox(label="Ask a question")
51
  output = gr.Markdown()
52
 
 
1
  import gradio as gr
 
2
 
3
  from langchain_community.document_loaders import PyPDFLoader
4
+ from langchain_community.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
  from langchain.chains import RetrievalQA
8
  from langchain_openai import ChatOpenAI
9
 
 
 
 
10
 
11
+ def run_qa(pdf_path, question):
12
+ if pdf_path is None or question.strip() == "":
13
+ return "Please upload a PDF and enter a question."
14
 
15
+ # Load PDF
16
  loader = PyPDFLoader(pdf_path)
17
  docs = loader.load()
18
 
19
+ # Split into chunks
20
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
21
  chunks = splitter.split_documents(docs)
22
 
23
+ # Create embeddings + vector store
24
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
25
  vectordb = FAISS.from_documents(chunks, embeddings)
26
 
27
+ # LLM
28
  llm = ChatOpenAI(temperature=0)
29
 
30
+ # Retrieval QA chain
31
  qa = RetrievalQA.from_chain_type(
32
  llm=llm,
33
  retriever=vectordb.as_retriever(),
34
  return_source_documents=True
35
  )
36
 
37
+ # Newer LangChain-safe call
38
+ result = qa.invoke({"query": question})
39
 
40
+ answer_text = result.get("result", "")
41
+ source_docs = result.get("source_documents", [])
42
+
43
+ sources = "\n\n".join([d.page_content[:500] for d in source_docs[:2]])
44
+
45
+ return f"### Answer\n{answer_text}\n\n---\n### Sources\n{sources}"
46
 
 
47
 
48
  with gr.Blocks(title="Agentic Document Intelligence") as demo:
49
+ gr.Markdown("# πŸ“„ Agentic Document Intelligence\nUpload a PDF and ask questions using RAG.")
 
 
50
 
51
+ pdf = gr.File(label="Upload PDF", type="filepath")
52
  question = gr.Textbox(label="Ask a question")
53
  output = gr.Markdown()
54