Spaces:

KousarRaza
/

LanguageAssistant

Sleeping

App Files Files Community

KousarRaza commited on Dec 8, 2024

Commit

a3a2932

verified ·

1 Parent(s): 6115563

Initial Comment

Browse files

Files changed (1) hide show

app.py +52 -0

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import streamlit as st
+from transformers import pipeline
+from langdetect import detect
+import fitz  # PyMuPDF
+# Function to extract text from PDF
+def extract_text_from_pdf(uploaded_file):
+    pdf_document = fitz.open(uploaded_file)
+    text = ""
+    for page_num in range(pdf_document.page_count):
+        page = pdf_document[page_num]
+        text += page.get_text()
+    return text
+# Language Detection Function
+def is_sindhi(text):
+    try:
+        language = detect(text)
+        return language == "sd"  # Sindhi language code
+    except:
+        return False
+# Streamlit UI
+st.title("School Assistant - PDF Query and Language Detection")
+# File Upload Section
+uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
+# Question Input Section
+question = st.text_input("Ask a question related to the PDF content:")
+# Initialize Hugging Face QA pipeline
+qa_pipeline = pipeline("question-answering")
+if uploaded_file:
+    # Extract text from the uploaded PDF
+    pdf_text = extract_text_from_pdf(uploaded_file)
+    # Check if the extracted text is in Sindhi
+    if is_sindhi(pdf_text):
+        st.write("The document appears to be in Sindhi.")
+    else:
+        st.write("The document is not in Sindhi.")
+    # Show the extracted text preview
+    st.text_area("Extracted Text Preview", pdf_text[:1000], height=200)
+    if question:
+        # Query the model for an answer
+        answer = qa_pipeline(question=question, context=pdf_text)
+        st.write("Answer: ", answer['answer'])