ojas121 commited on
Commit
b02d030
·
verified ·
1 Parent(s): 539b3ce

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sentence_transformers import SentenceTransformer, util
3
+ import PyPDF2
4
+
5
+ # Function to extract text from the uploaded PDF
6
+ def extract_text_from_pdf(pdf_file):
7
+ reader = PyPDF2.PdfReader(pdf_file)
8
+ text = ""
9
+ for page in reader.pages:
10
+ text += page.extract_text()
11
+ return text
12
+
13
+ # Function to process text into sentences and embeddings
14
+ def process_text(text):
15
+ sentences = [sentence.strip() for sentence in text.split("\n") if sentence.strip()]
16
+ model = SentenceTransformer('all-MiniLM-L6-v2') # A lightweight transformer model
17
+ embeddings = model.encode(sentences, show_progress_bar=True)
18
+ return sentences, embeddings, model
19
+
20
+ # Streamlit UI
21
+ st.title("GitaGPT: Bhagavad Gita Chatbot")
22
+ st.write("Upload the Bhagavad Gita PDF file and ask questions based on its teachings!")
23
+
24
+ # Upload PDF file
25
+ uploaded_file = st.file_uploader("Upload Bhagavad Gita PDF", type=["pdf"])
26
+
27
+ if uploaded_file:
28
+ with st.spinner("Extracting text and processing..."):
29
+ # Step 1: Extract text
30
+ raw_text = extract_text_from_pdf(uploaded_file)
31
+
32
+ # Step 2: Process text to generate embeddings
33
+ sentences, embeddings, model = process_text(raw_text)
34
+
35
+ st.success("PDF processed successfully! Ask your questions below.")
36
+
37
+ # Step 3: Input for user query
38
+ user_query = st.text_input("Ask your question:")
39
+
40
+ if user_query:
41
+ with st.spinner("Finding the best answer..."):
42
+ # Compute embedding for the user query
43
+ query_embedding = model.encode(user_query)
44
+ # Compute similarity scores
45
+ scores = util.cos_sim(query_embedding, embeddings)
46
+ best_match_idx = scores.argmax()
47
+ # Fetch the best matching sentence
48
+ response = sentences[best_match_idx]
49
+
50
+ st.write(f"**Answer:** {response}")
51
+ else:
52
+ st.info("Please upload a PDF file to begin.")
53
+