rbbist commited on
Commit
27bc93f
·
verified ·
1 Parent(s): 430911f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -5,20 +5,19 @@ from langchain.vectorstores import FAISS
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.chains import RetrievalQA
7
  from langchain.llms import HuggingFacePipeline
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
- import torch
10
 
11
- st.set_page_config(page_title="PDF QA App (Open Source)", layout="wide")
12
- st.title("📘 Ask Questions from Uploaded PDFs (Free/Open Source)")
13
 
14
  uploaded_files = st.file_uploader("Upload multiple PDF files", type=["pdf"], accept_multiple_files=True)
15
 
16
  @st.cache_resource
17
  def load_llm():
18
- model_id = "mistralai/Mistral-7B-Instruct-v0.1"
19
  tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16)
21
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
22
  return HuggingFacePipeline(pipeline=pipe)
23
 
24
  if uploaded_files:
@@ -27,7 +26,9 @@ if uploaded_files:
27
  for file in uploaded_files:
28
  reader = PyPDF2.PdfReader(file)
29
  for page in reader.pages:
30
- all_text += page.extract_text() or ""
 
 
31
 
32
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
33
  texts = text_splitter.split_text(all_text)
@@ -42,5 +43,5 @@ if uploaded_files:
42
  question = st.text_input("Ask a question based on the uploaded PDFs:")
43
  if question:
44
  with st.spinner("Generating answer..."):
45
- response = qa_chain.run(question)
46
- st.success(response)
 
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.chains import RetrievalQA
7
  from langchain.llms import HuggingFacePipeline
8
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
9
 
10
+ st.set_page_config(page_title="PDF QA App (CPU)", layout="wide")
11
+ st.title("📘 Ask Questions from Uploaded PDFs (Free & CPU Friendly)")
12
 
13
  uploaded_files = st.file_uploader("Upload multiple PDF files", type=["pdf"], accept_multiple_files=True)
14
 
15
  @st.cache_resource
16
  def load_llm():
17
+ model_id = "google/flan-t5-base"
18
  tokenizer = AutoTokenizer.from_pretrained(model_id)
19
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
20
+ pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
21
  return HuggingFacePipeline(pipeline=pipe)
22
 
23
  if uploaded_files:
 
26
  for file in uploaded_files:
27
  reader = PyPDF2.PdfReader(file)
28
  for page in reader.pages:
29
+ text = page.extract_text()
30
+ if text:
31
+ all_text += text
32
 
33
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
34
  texts = text_splitter.split_text(all_text)
 
43
  question = st.text_input("Ask a question based on the uploaded PDFs:")
44
  if question:
45
  with st.spinner("Generating answer..."):
46
+ answer = qa_chain.run(question)
47
+ st.success(answer)