rbbist commited on
Commit
430911f
·
verified ·
1 Parent(s): e18442b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -12
app.py CHANGED
@@ -1,19 +1,46 @@
1
  import streamlit as st
2
- from transformers import pipeline
 
 
 
 
 
 
 
3
 
4
- st.title("Sentiment Analysis")
 
 
 
5
 
6
- # Load pipeline once
7
  @st.cache_resource
8
- def load_pipeline():
9
- return pipeline("sentiment-analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- pipe = load_pipeline()
 
12
 
13
- # User input
14
- text = st.text_area("Enter some text to analyze")
 
15
 
16
- # Show result
17
- if text:
18
- result = pipe(text)
19
- st.write("**Result:**", result[0]["label"], f"({result[0]['score']:.2f})")
 
 
1
  import streamlit as st
2
+ import PyPDF2
3
+ from langchain.embeddings import SentenceTransformerEmbeddings
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.llms import HuggingFacePipeline
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
+ import torch
10
 
11
+ st.set_page_config(page_title="PDF QA App (Open Source)", layout="wide")
12
+ st.title("📘 Ask Questions from Uploaded PDFs (Free/Open Source)")
13
+
14
+ uploaded_files = st.file_uploader("Upload multiple PDF files", type=["pdf"], accept_multiple_files=True)
15
 
 
16
  @st.cache_resource
17
+ def load_llm():
18
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
20
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16)
21
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
22
+ return HuggingFacePipeline(pipeline=pipe)
23
+
24
+ if uploaded_files:
25
+ st.info("Reading and processing PDFs...")
26
+ all_text = ""
27
+ for file in uploaded_files:
28
+ reader = PyPDF2.PdfReader(file)
29
+ for page in reader.pages:
30
+ all_text += page.extract_text() or ""
31
+
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
33
+ texts = text_splitter.split_text(all_text)
34
 
35
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
36
+ db = FAISS.from_texts(texts, embeddings)
37
 
38
+ retriever = db.as_retriever()
39
+ llm = load_llm()
40
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
41
 
42
+ question = st.text_input("Ask a question based on the uploaded PDFs:")
43
+ if question:
44
+ with st.spinner("Generating answer..."):
45
+ response = qa_chain.run(question)
46
+ st.success(response)