File size: 1,508 Bytes
a3a2932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
from transformers import pipeline
from langdetect import detect
import fitz  # PyMuPDF

# Function to extract text from PDF
def extract_text_from_pdf(uploaded_file):
    pdf_document = fitz.open(uploaded_file)
    text = ""
    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]
        text += page.get_text()
    return text

# Language Detection Function
def is_sindhi(text):
    try:
        language = detect(text)
        return language == "sd"  # Sindhi language code
    except:
        return False

# Streamlit UI
st.title("School Assistant - PDF Query and Language Detection")

# File Upload Section
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])

# Question Input Section
question = st.text_input("Ask a question related to the PDF content:")

# Initialize Hugging Face QA pipeline
qa_pipeline = pipeline("question-answering")

if uploaded_file:
    # Extract text from the uploaded PDF
    pdf_text = extract_text_from_pdf(uploaded_file)
    
    # Check if the extracted text is in Sindhi
    if is_sindhi(pdf_text):
        st.write("The document appears to be in Sindhi.")
    else:
        st.write("The document is not in Sindhi.")

    # Show the extracted text preview
    st.text_area("Extracted Text Preview", pdf_text[:1000], height=200)

    if question:
        # Query the model for an answer
        answer = qa_pipeline(question=question, context=pdf_text)
        st.write("Answer: ", answer['answer'])