Nav772 commited on
Commit
1103643
·
verified ·
1 Parent(s): ec5ce14

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +25 -61
app.py CHANGED
@@ -12,87 +12,51 @@ embedding_model = HuggingFaceEmbeddings(
12
  )
13
 
14
  client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
15
-
16
  vectorstore = None
17
 
18
  def process_pdf(pdf_file):
19
  global vectorstore
20
-
21
  if pdf_file is None:
22
  return "Please upload a PDF file."
23
-
24
  try:
25
  loader = PyPDFLoader(pdf_file.name)
26
  documents = loader.load()
27
-
28
- text_splitter = RecursiveCharacterTextSplitter(
29
- chunk_size=1000,
30
- chunk_overlap=200,
31
- )
32
  chunks = text_splitter.split_documents(documents)
33
-
34
- vectorstore = FAISS.from_documents(
35
- documents=chunks,
36
- embedding=embedding_model
37
- )
38
-
39
- return f"Processed {len(documents)} pages into {len(chunks)} chunks. Ready!"
40
-
41
  except Exception as e:
42
- return f"Error: {str(e)}"
43
 
44
  def answer_question(question):
45
  global vectorstore
46
-
47
  if vectorstore is None:
48
- return "Please upload and process a PDF first.", ""
49
-
50
  if not question.strip():
51
- return "Please enter a question.", ""
52
-
53
  try:
54
  docs = vectorstore.similarity_search(question, k=3)
55
  context = "\n\n".join([doc.page_content for doc in docs])
56
-
57
- prompt = f"""<|system|>
58
- You are a helpful assistant. Answer based on the context only.
59
- </s>
60
- <|user|>
61
- Context:
62
- {context}
63
-
64
- Question: {question}
65
- </s>
66
- <|assistant|>"""
67
-
68
- response = client.text_generation(
69
- prompt,
70
- max_new_tokens=512,
71
- temperature=0.7,
72
- )
73
-
74
- sources = []
75
- for i, doc in enumerate(docs, 1):
76
- page = doc.metadata.get('page', 'N/A')
77
- if isinstance(page, int):
78
- page += 1
79
- preview = doc.page_content[:150].replace('\n', ' ')
80
- sources.append(f"{i}. Page {page}: {preview}...")
81
-
82
  return response, "\n".join(sources)
83
-
84
  except Exception as e:
85
  return f"Error: {str(e)}", ""
86
 
87
- demo = gr.Interface(
88
- fn=answer_question,
89
- inputs=gr.Textbox(label="Question"),
90
- outputs=[
91
- gr.Textbox(label="Answer"),
92
- gr.Textbox(label="Sources")
93
- ],
94
- title="RAG Document Q&A",
95
- description="Ask questions about uploaded documents."
96
- )
97
-
98
- demo.launch()
 
 
 
 
 
12
  )
13
 
14
  client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
 
15
  vectorstore = None
16
 
17
  def process_pdf(pdf_file):
18
  global vectorstore
 
19
  if pdf_file is None:
20
  return "Please upload a PDF file."
 
21
  try:
22
  loader = PyPDFLoader(pdf_file.name)
23
  documents = loader.load()
24
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
 
 
 
25
  chunks = text_splitter.split_documents(documents)
26
+ vectorstore = FAISS.from_documents(documents=chunks, embedding=embedding_model)
27
+ return f"✅ Processed {len(documents)} pages into {len(chunks)} chunks."
 
 
 
 
 
 
28
  except Exception as e:
29
+ return f"Error: {str(e)}"
30
 
31
  def answer_question(question):
32
  global vectorstore
 
33
  if vectorstore is None:
34
+ return "Upload a PDF first.", ""
 
35
  if not question.strip():
36
+ return "Enter a question.", ""
 
37
  try:
38
  docs = vectorstore.similarity_search(question, k=3)
39
  context = "\n\n".join([doc.page_content for doc in docs])
40
+ prompt = f"<|system|>\nAnswer based on context only.\n</s>\n<|user|>\nContext:\n{context}\n\nQuestion: {question}\n</s>\n<|assistant|>\n"
41
+ response = client.text_generation(prompt, max_new_tokens=512, temperature=0.7)
42
+ sources = [f"{i}. Page {doc.metadata.get('page', 'N/A')}" for i, doc in enumerate(docs, 1)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  return response, "\n".join(sources)
 
44
  except Exception as e:
45
  return f"Error: {str(e)}", ""
46
 
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# 📚 RAG Document Q&A")
49
+ with gr.Row():
50
+ with gr.Column():
51
+ pdf = gr.File(label="Upload PDF", file_types=[".pdf"])
52
+ btn1 = gr.Button("Process PDF")
53
+ status = gr.Textbox(label="Status")
54
+ with gr.Column():
55
+ question = gr.Textbox(label="Question")
56
+ btn2 = gr.Button("Ask")
57
+ answer = gr.Textbox(label="Answer", lines=5)
58
+ sources = gr.Textbox(label="Sources")
59
+ btn1.click(process_pdf, pdf, status)
60
+ btn2.click(answer_question, question, [answer, sources])
61
+
62
+ demo.launch(server_name="0.0.0.0", server_port=7860)