Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Handouts / appv1.py

Deevyankar

Rename app.py to appv1.py

357d8c6 verified 3 months ago

raw

history blame contribute delete

2.59 kB



	import gradio as gr
	import fitz # PyMuPDF
	from docx import Document
	import io

	def extract_text_from_pdf(uploaded_file):
	try:
	# Fix: handle both bytes and file-like
	file_bytes = uploaded_file if isinstance(uploaded_file, bytes) else uploaded_file.read()
	doc = fitz.open(stream=file_bytes, filetype="pdf")
	text = ""
	for page in doc:
	page_text = page.get_text()
	if page_text.strip():
	text += page_text + "\n"
	return text.strip()
	except Exception as e:
	return f"Error extracting text: {str(e)}"

	def extract_los(lo_file):
	try:
	file_bytes = lo_file if isinstance(lo_file, bytes) else lo_file.read()
	name = getattr(lo_file, "name", "")
	ext = name.lower().split('.')[-1] if name else "docx"

	if ext == "txt":
	return file_bytes.decode("utf-8").splitlines()
	elif ext == "docx":
	file_stream = io.BytesIO(file_bytes)
	doc = Document(file_stream)
	return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
	else:
	return []
	except Exception as e:
	return [f"Error loading LOs: {str(e)}"]

	def compare_handouts(old_pdf, new_pdf, lo_file):
	old_text = extract_text_from_pdf(old_pdf)
	new_text = extract_text_from_pdf(new_pdf)
	los = extract_los(lo_file)

	if not old_text or not new_text:
	return "❗ One or both PDFs may not contain extractable text."

	old_lines = set(old_text.splitlines())
	new_lines = set(new_text.splitlines())

	added = new_lines - old_lines
	removed = old_lines - new_lines
	total_lines = max(len(old_lines.union(new_lines)), 1)
	change_percent = ((len(added) + len(removed)) / total_lines) * 100

	lo_summary = ""
	if los:
	lo_summary = "\n".join([f"• {lo}" for lo in los])
	else:
	lo_summary = "No learning outcomes detected."

	return f"🔍 Change Summary:\n- Added lines: {len(added)}\n- Removed lines: {len(removed)}\n- Change %: {change_percent:.2f}%\n\n📘 Learning Outcomes:\n{lo_summary}"

	iface = gr.Interface(
	fn=compare_handouts,
	inputs=[
	gr.File(label="📤 Old Handout PDF", type="binary"),
	gr.File(label="📥 New Handout PDF", type="binary"),
	gr.File(label="📚 Learning Outcomes (.docx or .txt)", type="binary")
	],
	outputs="text",
	title="📊 Handout Comparison & Learning Outcome Checker",
	description="Upload old & new PDFs with optional LOs to see update percentage and coverage."
	)

	iface.launch()