Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from langdetect import detect | |
| import fitz # PyMuPDF | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(uploaded_file): | |
| pdf_document = fitz.open(uploaded_file) | |
| text = "" | |
| for page_num in range(pdf_document.page_count): | |
| page = pdf_document[page_num] | |
| text += page.get_text() | |
| return text | |
| # Language Detection Function | |
| def is_sindhi(text): | |
| try: | |
| language = detect(text) | |
| return language == "sd" # Sindhi language code | |
| except: | |
| return False | |
| # Streamlit UI | |
| st.title("School Assistant - PDF Query and Language Detection") | |
| # File Upload Section | |
| uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
| # Question Input Section | |
| question = st.text_input("Ask a question related to the PDF content:") | |
| # Initialize Hugging Face QA pipeline | |
| qa_pipeline = pipeline("question-answering") | |
| if uploaded_file: | |
| # Extract text from the uploaded PDF | |
| pdf_text = extract_text_from_pdf(uploaded_file) | |
| # Check if the extracted text is in Sindhi | |
| if is_sindhi(pdf_text): | |
| st.write("The document appears to be in Sindhi.") | |
| else: | |
| st.write("The document is not in Sindhi.") | |
| # Show the extracted text preview | |
| st.text_area("Extracted Text Preview", pdf_text[:1000], height=200) | |
| if question: | |
| # Query the model for an answer | |
| answer = qa_pipeline(question=question, context=pdf_text) | |
| st.write("Answer: ", answer['answer']) | |