prithvi1029 commited on
Commit
a0afdb9
·
verified ·
1 Parent(s): b98f9c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -4,64 +4,64 @@ from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_text_splitters import RecursiveCharacterTextSplitter
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
- from langchain_openai import ChatOpenAI
8
 
9
 
10
  def run_qa(pdf_path, question):
11
- if pdf_path is None or not question or question.strip() == "":
12
  return "Please upload a PDF and enter a question."
13
 
14
- # 1) Load PDF
15
  loader = PyPDFLoader(pdf_path)
16
  docs = loader.load()
17
 
18
- # 2) Split
19
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
20
  chunks = splitter.split_documents(docs)
21
 
22
- # 3) Embed + Vector store
23
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
24
  vectordb = FAISS.from_documents(chunks, embeddings)
25
 
26
- # 4) Retrieve relevant chunks
27
  retriever = vectordb.as_retriever(search_kwargs={"k": 4})
28
  retrieved_docs = retriever.get_relevant_documents(question)
29
-
30
  context = "\n\n".join([d.page_content for d in retrieved_docs])
31
 
32
- # 5) LLM (OpenAI)
33
- llm = ChatOpenAI(temperature=0)
 
 
 
 
34
 
35
  prompt = f"""
36
- You are a helpful assistant. Answer the question using ONLY the context below.
37
- If the answer is not in the context, say "I don't know".
38
 
39
- CONTEXT:
40
  {context}
41
 
42
- QUESTION:
43
  {question}
44
 
45
  Answer:
46
- """.strip()
47
 
48
- response = llm.invoke(prompt)
49
- answer = response.content if hasattr(response, "content") else str(response)
50
 
51
- # 6) Sources preview
52
  sources = "\n\n".join([d.page_content[:500] for d in retrieved_docs[:2]])
53
 
54
  return f"### Answer\n{answer}\n\n---\n### Sources\n{sources}"
55
 
56
 
57
  with gr.Blocks(title="Agentic Document Intelligence") as demo:
58
- gr.Markdown("# 📄 Agentic Document Intelligence\nUpload a PDF and ask questions using RAG.")
59
 
60
  pdf = gr.File(label="Upload PDF", type="filepath")
61
  question = gr.Textbox(label="Ask a question")
62
  output = gr.Markdown()
63
 
64
- btn = gr.Button("Run")
65
- btn.click(run_qa, inputs=[pdf, question], outputs=output)
66
 
67
  demo.launch()
 
4
  from langchain_text_splitters import RecursiveCharacterTextSplitter
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
+ from langchain_huggingface import HuggingFaceEndpoint
8
 
9
 
10
  def run_qa(pdf_path, question):
11
+ if pdf_path is None or not question.strip():
12
  return "Please upload a PDF and enter a question."
13
 
14
+ # Load PDF
15
  loader = PyPDFLoader(pdf_path)
16
  docs = loader.load()
17
 
18
+ # Split
19
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
20
  chunks = splitter.split_documents(docs)
21
 
22
+ # Embeddings
23
  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
24
  vectordb = FAISS.from_documents(chunks, embeddings)
25
 
26
+ # Retrieve
27
  retriever = vectordb.as_retriever(search_kwargs={"k": 4})
28
  retrieved_docs = retriever.get_relevant_documents(question)
 
29
  context = "\n\n".join([d.page_content for d in retrieved_docs])
30
 
31
+ # Hugging Face LLM
32
+ llm = HuggingFaceEndpoint(
33
+ repo_id="mistralai/Mistral-7B-Instruct-v0.2",
34
+ temperature=0.2,
35
+ max_new_tokens=512,
36
+ )
37
 
38
  prompt = f"""
39
+ You are a helpful assistant. Answer ONLY using the context.
40
+ If the answer is not present, say "I don't know".
41
 
42
+ Context:
43
  {context}
44
 
45
+ Question:
46
  {question}
47
 
48
  Answer:
49
+ """
50
 
51
+ answer = llm.invoke(prompt)
 
52
 
 
53
  sources = "\n\n".join([d.page_content[:500] for d in retrieved_docs[:2]])
54
 
55
  return f"### Answer\n{answer}\n\n---\n### Sources\n{sources}"
56
 
57
 
58
  with gr.Blocks(title="Agentic Document Intelligence") as demo:
59
+ gr.Markdown("# 📄 Agentic Document Intelligence (HF LLM)")
60
 
61
  pdf = gr.File(label="Upload PDF", type="filepath")
62
  question = gr.Textbox(label="Ask a question")
63
  output = gr.Markdown()
64
 
65
+ gr.Button("Run").click(run_qa, inputs=[pdf, question], outputs=output)
 
66
 
67
  demo.launch()