Handouts / appv1.py
Deevyankar's picture
Rename app.py to appv1.py
357d8c6 verified
import gradio as gr
import fitz # PyMuPDF
from docx import Document
import io
def extract_text_from_pdf(uploaded_file):
try:
# Fix: handle both bytes and file-like
file_bytes = uploaded_file if isinstance(uploaded_file, bytes) else uploaded_file.read()
doc = fitz.open(stream=file_bytes, filetype="pdf")
text = ""
for page in doc:
page_text = page.get_text()
if page_text.strip():
text += page_text + "\n"
return text.strip()
except Exception as e:
return f"Error extracting text: {str(e)}"
def extract_los(lo_file):
try:
file_bytes = lo_file if isinstance(lo_file, bytes) else lo_file.read()
name = getattr(lo_file, "name", "")
ext = name.lower().split('.')[-1] if name else "docx"
if ext == "txt":
return file_bytes.decode("utf-8").splitlines()
elif ext == "docx":
file_stream = io.BytesIO(file_bytes)
doc = Document(file_stream)
return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
else:
return []
except Exception as e:
return [f"Error loading LOs: {str(e)}"]
def compare_handouts(old_pdf, new_pdf, lo_file):
old_text = extract_text_from_pdf(old_pdf)
new_text = extract_text_from_pdf(new_pdf)
los = extract_los(lo_file)
if not old_text or not new_text:
return "❗ One or both PDFs may not contain extractable text."
old_lines = set(old_text.splitlines())
new_lines = set(new_text.splitlines())
added = new_lines - old_lines
removed = old_lines - new_lines
total_lines = max(len(old_lines.union(new_lines)), 1)
change_percent = ((len(added) + len(removed)) / total_lines) * 100
lo_summary = ""
if los:
lo_summary = "\n".join([f"β€’ {lo}" for lo in los])
else:
lo_summary = "No learning outcomes detected."
return f"πŸ” **Change Summary:**\n- Added lines: {len(added)}\n- Removed lines: {len(removed)}\n- Change %: {change_percent:.2f}%\n\nπŸ“˜ **Learning Outcomes:**\n{lo_summary}"
iface = gr.Interface(
fn=compare_handouts,
inputs=[
gr.File(label="πŸ“€ Old Handout PDF", type="binary"),
gr.File(label="πŸ“₯ New Handout PDF", type="binary"),
gr.File(label="πŸ“š Learning Outcomes (.docx or .txt)", type="binary")
],
outputs="text",
title="πŸ“Š Handout Comparison & Learning Outcome Checker",
description="Upload old & new PDFs with optional LOs to see update percentage and coverage."
)
iface.launch()