Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 20, 2025

Commit

89ad98d

verified ·

1 Parent(s): 357d8c6

Create app.py

Browse files

Files changed (1) hide show

app.py +102 -0

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import gradio as gr
+import fitz  # PyMuPDF
+from docx import Document
+import io
+import difflib
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+def extract_text_from_pdf(uploaded_file):
+    try:
+        file_bytes = uploaded_file if isinstance(uploaded_file, bytes) else uploaded_file.read()
+        doc = fitz.open(stream=file_bytes, filetype="pdf")
+        text = ""
+        for page in doc:
+            page_text = page.get_text()
+            if page_text.strip():
+                text += page_text + "\n"
+        return text.strip()
+    except Exception as e:
+        return f"Error extracting text: {str(e)}"
+def extract_los(lo_file):
+    try:
+        file_bytes = lo_file if isinstance(lo_file, bytes) else lo_file.read()
+        name = getattr(lo_file, "name", "")
+        ext = name.lower().split('.')[-1] if name else "docx"
+        if ext == "txt":
+            return file_bytes.decode("utf-8").splitlines()
+        elif ext == "docx":
+            file_stream = io.BytesIO(file_bytes)
+            doc = Document(file_stream)
+            return [p.text.strip() for p in doc.paragraphs if p.text.strip()]
+        else:
+            return []
+    except Exception as e:
+        return [f"Error loading LOs: {str(e)}"]
+def calculate_similarity(text, los):
+    if not los:
+        return 0.0
+    combined_los = " ".join(los)
+    texts = [text, combined_los]
+    vectorizer = TfidfVectorizer().fit_transform(texts)
+    return cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0] * 100
+def quality_check(new_text):
+    words = new_text.split()
+    return "🟢 Content quality seems improved." if len(words) > 300 else "🟡 Content quality needs enhancement."
+def visual_diff(old_text, new_text):
+    diff = difflib.unified_diff(
+        old_text.splitlines(), new_text.splitlines(),
+        lineterm='', n=3
+    )
+    return "\n".join(diff)
+def compare_handouts(old_pdf, new_pdf, lo_file):
+    old_text = extract_text_from_pdf(old_pdf)
+    new_text = extract_text_from_pdf(new_pdf)
+    los = extract_los(lo_file)
+    if not old_text or not new_text:
+        return "❗ One or both PDFs may not contain extractable text.", "", "", ""
+    old_lines = set(old_text.splitlines())
+    new_lines = set(new_text.splitlines())
+    added = new_lines - old_lines
+    removed = old_lines - new_lines
+    total_lines = max(len(old_lines.union(new_lines)), 1)
+    change_percent = ((len(added) + len(removed)) / total_lines) * 100
+    similarity_score = calculate_similarity(new_text, los)
+    quality_statement = quality_check(new_text)
+    diff_output = visual_diff(old_text, new_text)
+    summary = f"🔍 **Change Summary:**\n- Added lines: {len(added)}\n- Removed lines: {len(removed)}\n- Change %: {change_percent:.2f}%"
+    lo_output = "\n".join([f"• {lo}" for lo in los]) if los else "No learning outcomes detected."
+    sim_output = f"📐 **LO Similarity Score:** {similarity_score:.2f}%"
+    return summary, lo_output, sim_output, quality_statement + "\n\n📄 Visual Diff:\n" + diff_output
+iface = gr.Interface(
+    fn=compare_handouts,
+    inputs=[
+        gr.File(label="📤 Old Handout PDF", type="binary"),
+        gr.File(label="📥 New Handout PDF", type="binary"),
+        gr.File(label="📚 Learning Outcomes (.docx or .txt)", type="binary")
+    ],
+    outputs=[
+        gr.Textbox(label="🧾 Change Summary", lines=4),
+        gr.Textbox(label="🎯 Learning Outcomes", lines=6),
+        gr.Textbox(label="📐 LO Semantic Similarity", lines=2),
+        gr.Textbox(label="📊 Visual Diff + Content Quality", lines=20)
+    ],
+    title="📊 Smart Handout Comparator with LO Matching & Quality Insights",
+    description="Upload PDFs and LO file to analyze update percentage, learning outcome alignment, and content improvement insights. Now with visual diff!"
+)
+iface.launch()