Spaces:

KousarRaza
/

LanguageAssistant

Sleeping

LanguageAssistant / app.py

Initial Comment

a3a2932 verified about 1 year ago

1.51 kB

	import streamlit as st
	from transformers import pipeline
	from langdetect import detect
	import fitz # PyMuPDF

	# Function to extract text from PDF
	def extract_text_from_pdf(uploaded_file):
	pdf_document = fitz.open(uploaded_file)
	text = ""
	for page_num in range(pdf_document.page_count):
	page = pdf_document[page_num]
	text += page.get_text()
	return text

	# Language Detection Function
	def is_sindhi(text):
	try:
	language = detect(text)
	return language == "sd" # Sindhi language code
	except:
	return False

	# Streamlit UI
	st.title("School Assistant - PDF Query and Language Detection")

	# File Upload Section
	uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])

	# Question Input Section
	question = st.text_input("Ask a question related to the PDF content:")

	# Initialize Hugging Face QA pipeline
	qa_pipeline = pipeline("question-answering")

	if uploaded_file:
	# Extract text from the uploaded PDF
	pdf_text = extract_text_from_pdf(uploaded_file)

	# Check if the extracted text is in Sindhi
	if is_sindhi(pdf_text):
	st.write("The document appears to be in Sindhi.")
	else:
	st.write("The document is not in Sindhi.")

	# Show the extracted text preview
	st.text_area("Extracted Text Preview", pdf_text[:1000], height=200)

	if question:
	# Query the model for an answer
	answer = qa_pipeline(question=question, context=pdf_text)
	st.write("Answer: ", answer['answer'])