Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from docx import Document | |
| import io | |
| def extract_text_from_pdf(uploaded_file): | |
| try: | |
| # Fix: handle both bytes and file-like | |
| file_bytes = uploaded_file if isinstance(uploaded_file, bytes) else uploaded_file.read() | |
| doc = fitz.open(stream=file_bytes, filetype="pdf") | |
| text = "" | |
| for page in doc: | |
| page_text = page.get_text() | |
| if page_text.strip(): | |
| text += page_text + "\n" | |
| return text.strip() | |
| except Exception as e: | |
| return f"Error extracting text: {str(e)}" | |
| def extract_los(lo_file): | |
| try: | |
| file_bytes = lo_file if isinstance(lo_file, bytes) else lo_file.read() | |
| name = getattr(lo_file, "name", "") | |
| ext = name.lower().split('.')[-1] if name else "docx" | |
| if ext == "txt": | |
| return file_bytes.decode("utf-8").splitlines() | |
| elif ext == "docx": | |
| file_stream = io.BytesIO(file_bytes) | |
| doc = Document(file_stream) | |
| return [p.text.strip() for p in doc.paragraphs if p.text.strip()] | |
| else: | |
| return [] | |
| except Exception as e: | |
| return [f"Error loading LOs: {str(e)}"] | |
| def compare_handouts(old_pdf, new_pdf, lo_file): | |
| old_text = extract_text_from_pdf(old_pdf) | |
| new_text = extract_text_from_pdf(new_pdf) | |
| los = extract_los(lo_file) | |
| if not old_text or not new_text: | |
| return "β One or both PDFs may not contain extractable text." | |
| old_lines = set(old_text.splitlines()) | |
| new_lines = set(new_text.splitlines()) | |
| added = new_lines - old_lines | |
| removed = old_lines - new_lines | |
| total_lines = max(len(old_lines.union(new_lines)), 1) | |
| change_percent = ((len(added) + len(removed)) / total_lines) * 100 | |
| lo_summary = "" | |
| if los: | |
| lo_summary = "\n".join([f"β’ {lo}" for lo in los]) | |
| else: | |
| lo_summary = "No learning outcomes detected." | |
| return f"π **Change Summary:**\n- Added lines: {len(added)}\n- Removed lines: {len(removed)}\n- Change %: {change_percent:.2f}%\n\nπ **Learning Outcomes:**\n{lo_summary}" | |
| iface = gr.Interface( | |
| fn=compare_handouts, | |
| inputs=[ | |
| gr.File(label="π€ Old Handout PDF", type="binary"), | |
| gr.File(label="π₯ New Handout PDF", type="binary"), | |
| gr.File(label="π Learning Outcomes (.docx or .txt)", type="binary") | |
| ], | |
| outputs="text", | |
| title="π Handout Comparison & Learning Outcome Checker", | |
| description="Upload old & new PDFs with optional LOs to see update percentage and coverage." | |
| ) | |
| iface.launch() |