import streamlit as st from transformers import pipeline from langdetect import detect import fitz # PyMuPDF # Function to extract text from PDF def extract_text_from_pdf(uploaded_file): pdf_document = fitz.open(uploaded_file) text = "" for page_num in range(pdf_document.page_count): page = pdf_document[page_num] text += page.get_text() return text # Language Detection Function def is_sindhi(text): try: language = detect(text) return language == "sd" # Sindhi language code except: return False # Streamlit UI st.title("School Assistant - PDF Query and Language Detection") # File Upload Section uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) # Question Input Section question = st.text_input("Ask a question related to the PDF content:") # Initialize Hugging Face QA pipeline qa_pipeline = pipeline("question-answering") if uploaded_file: # Extract text from the uploaded PDF pdf_text = extract_text_from_pdf(uploaded_file) # Check if the extracted text is in Sindhi if is_sindhi(pdf_text): st.write("The document appears to be in Sindhi.") else: st.write("The document is not in Sindhi.") # Show the extracted text preview st.text_area("Extracted Text Preview", pdf_text[:1000], height=200) if question: # Query the model for an answer answer = qa_pipeline(question=question, context=pdf_text) st.write("Answer: ", answer['answer'])