import gradio as gr import fitz # PyMuPDF from docx import Document import io def extract_text_from_pdf(uploaded_file): try: # Fix: handle both bytes and file-like file_bytes = uploaded_file if isinstance(uploaded_file, bytes) else uploaded_file.read() doc = fitz.open(stream=file_bytes, filetype="pdf") text = "" for page in doc: page_text = page.get_text() if page_text.strip(): text += page_text + "\n" return text.strip() except Exception as e: return f"Error extracting text: {str(e)}" def extract_los(lo_file): try: file_bytes = lo_file if isinstance(lo_file, bytes) else lo_file.read() name = getattr(lo_file, "name", "") ext = name.lower().split('.')[-1] if name else "docx" if ext == "txt": return file_bytes.decode("utf-8").splitlines() elif ext == "docx": file_stream = io.BytesIO(file_bytes) doc = Document(file_stream) return [p.text.strip() for p in doc.paragraphs if p.text.strip()] else: return [] except Exception as e: return [f"Error loading LOs: {str(e)}"] def compare_handouts(old_pdf, new_pdf, lo_file): old_text = extract_text_from_pdf(old_pdf) new_text = extract_text_from_pdf(new_pdf) los = extract_los(lo_file) if not old_text or not new_text: return "ā— One or both PDFs may not contain extractable text." old_lines = set(old_text.splitlines()) new_lines = set(new_text.splitlines()) added = new_lines - old_lines removed = old_lines - new_lines total_lines = max(len(old_lines.union(new_lines)), 1) change_percent = ((len(added) + len(removed)) / total_lines) * 100 lo_summary = "" if los: lo_summary = "\n".join([f"• {lo}" for lo in los]) else: lo_summary = "No learning outcomes detected." return f"šŸ” **Change Summary:**\n- Added lines: {len(added)}\n- Removed lines: {len(removed)}\n- Change %: {change_percent:.2f}%\n\nšŸ“˜ **Learning Outcomes:**\n{lo_summary}" iface = gr.Interface( fn=compare_handouts, inputs=[ gr.File(label="šŸ“¤ Old Handout PDF", type="binary"), gr.File(label="šŸ“„ New Handout PDF", type="binary"), gr.File(label="šŸ“š Learning Outcomes (.docx or .txt)", type="binary") ], outputs="text", title="šŸ“Š Handout Comparison & Learning Outcome Checker", description="Upload old & new PDFs with optional LOs to see update percentage and coverage." ) iface.launch()