KousarRaza commited on
Commit
a3a2932
·
verified ·
1 Parent(s): 6115563

Initial Comment

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from langdetect import detect
4
+ import fitz # PyMuPDF
5
+
6
+ # Function to extract text from PDF
7
+ def extract_text_from_pdf(uploaded_file):
8
+ pdf_document = fitz.open(uploaded_file)
9
+ text = ""
10
+ for page_num in range(pdf_document.page_count):
11
+ page = pdf_document[page_num]
12
+ text += page.get_text()
13
+ return text
14
+
15
+ # Language Detection Function
16
+ def is_sindhi(text):
17
+ try:
18
+ language = detect(text)
19
+ return language == "sd" # Sindhi language code
20
+ except:
21
+ return False
22
+
23
+ # Streamlit UI
24
+ st.title("School Assistant - PDF Query and Language Detection")
25
+
26
+ # File Upload Section
27
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
28
+
29
+ # Question Input Section
30
+ question = st.text_input("Ask a question related to the PDF content:")
31
+
32
+ # Initialize Hugging Face QA pipeline
33
+ qa_pipeline = pipeline("question-answering")
34
+
35
+ if uploaded_file:
36
+ # Extract text from the uploaded PDF
37
+ pdf_text = extract_text_from_pdf(uploaded_file)
38
+
39
+ # Check if the extracted text is in Sindhi
40
+ if is_sindhi(pdf_text):
41
+ st.write("The document appears to be in Sindhi.")
42
+ else:
43
+ st.write("The document is not in Sindhi.")
44
+
45
+ # Show the extracted text preview
46
+ st.text_area("Extracted Text Preview", pdf_text[:1000], height=200)
47
+
48
+ if question:
49
+ # Query the model for an answer
50
+ answer = qa_pipeline(question=question, context=pdf_text)
51
+ st.write("Answer: ", answer['answer'])
52
+