Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 19, 2025

Commit

d0c3e02

verified ·

1 Parent(s): 2135a5e

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -42

app.py CHANGED Viewed

@@ -5,83 +5,96 @@ from sentence_transformers import SentenceTransformer, util
 from docx import Document
 import io
-# Load the sentence-transformer model
 model = SentenceTransformer('all-MiniLM-L6-v2')
-# Extract text from PDF using PyMuPDF
 def extract_text_from_pdf(pdf_file):
-    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
-    full_text = ""
-    for page in doc:
-        full_text += page.get_text()
-    return full_text
-# Extract Learning Outcomes from .txt or .docx
 def extract_los(lo_file):
-    if lo_file.name.endswith('.txt'):
-        return lo_file.read().decode('utf-8').splitlines()
-    elif lo_file.name.endswith('.docx'):
-        file_bytes = io.BytesIO(lo_file.read())
-        doc = Document(file_bytes)
-        return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
-    else:
         return []
-# Main app logic
 def compare_and_assess(old_pdf, new_pdf, lo_file):
     if not old_pdf or not new_pdf or not lo_file:
         return "❌ Please upload all three files."
-    # Extract text
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
-    if len(old_text.strip()) < 50 or len(new_text.strip()) < 50:
         return "⚠️ One of the PDFs may be empty or unreadable."
-    # Diff analysis
     old_lines = old_text.splitlines()
     new_lines = new_text.splitlines()
-    diff = list(difflib.unified_diff(old_lines, new_lines))
-    added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
-    removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
     percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
-    # LO analysis
     los = extract_los(lo_file)
-    if not los:
-        return "⚠️ No valid Learning Outcomes found in the file."
-    new_emb = model.encode(new_text, convert_to_tensor=True)
     lo_scores = []
-    for lo in los:
-        lo_emb = model.encode(lo, convert_to_tensor=True)
-        sim = util.cos_sim(new_emb, lo_emb).max().item()
-        lo_scores.append(f"• {lo[:80]}: {sim*100:.1f}% relevant")
-    # Output
-    summary = f"📈 Content Updated: {percent_change:.2f}%\n"
-    summary += f"🔼 Added Lines: {len(added)}\n🔽 Removed Lines: {len(removed)}\n\n"
-    summary += "🎯 Learning Outcome Coverage:\n" + "\n".join(lo_scores[:10])
-    # Debug logs (can be viewed in Hugging Face Logs tab)
-    print("✅ PDFs compared successfully.")
-    print("LOs evaluated:", len(lo_scores))
-    return summary
-# Gradio interface
 iface = gr.Interface(
     fn=compare_and_assess,
     inputs=[
         gr.File(label="Upload Old PDF", type="binary"),
         gr.File(label="Upload New PDF", type="binary"),
-        gr.File(label="Upload Learning Outcomes (.txt or .docx)", type="binary")
     ],
     outputs="text",
     title="📚 Course Handout Comparator + LO Evaluator",
-    description="Compare two PDF handouts (old + new) and a Learning Outcome file. Calculates % updated and checks how well the new content aligns with your course outcomes."
 )
 iface.launch()

 from docx import Document
 import io
+# Load model
 model = SentenceTransformer('all-MiniLM-L6-v2')
+# PDF text extraction
 def extract_text_from_pdf(pdf_file):
+    try:
+        doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        return text
+    except Exception as e:
+        print(f"[PDF ERROR] {e}")
+        return ""
+# Extract LO from .txt or .docx
 def extract_los(lo_file):
+    try:
+        if lo_file.name.endswith(".txt"):
+            return lo_file.read().decode("utf-8").splitlines()
+        elif lo_file.name.endswith(".docx"):
+            file_bytes = io.BytesIO(lo_file.read())
+            doc = Document(file_bytes)
+            return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
+        else:
+            return []
+    except Exception as e:
+        print(f"[LO ERROR] {e}")
         return []
+# Main function
 def compare_and_assess(old_pdf, new_pdf, lo_file):
     if not old_pdf or not new_pdf or not lo_file:
         return "❌ Please upload all three files."
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
+    if len(old_text.strip()) < 20 or len(new_text.strip()) < 20:
         return "⚠️ One of the PDFs may be empty or unreadable."
+    # Compare content
     old_lines = old_text.splitlines()
     new_lines = new_text.splitlines()
+    diff = list(difflib.unified_diff(old_lines, new_lines))
+    added = [l for l in diff if l.startswith("+") and not l.startswith("+++")]
+    removed = [l for l in diff if l.startswith("-") and not l.startswith("---")]
     percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
+    # LO Coverage
     los = extract_los(lo_file)
     lo_scores = []
+    if los:
+        new_emb = model.encode(new_text, convert_to_tensor=True)
+        for lo in los:
+            lo_emb = model.encode(lo, convert_to_tensor=True)
+            sim = util.cos_sim(new_emb, lo_emb).max().item()
+            lo_scores.append((lo, sim))
+        lo_scores = sorted(lo_scores, key=lambda x: x[1], reverse=True)
+        lo_summary = "\n".join([f"• {lo[:90]} — {score*100:.1f}%" for lo, score in lo_scores[:10]])
+    else:
+        lo_summary = "⚠️ No valid Learning Outcomes found."
+    # Final output
+    result = f"📊 **Comparison Summary**\n"
+    result += f"- 🧾 Added lines: {len(added)}\n"
+    result += f"- 🗑️ Removed lines: {len(removed)}\n"
+    result += f"- 🔄 Overall update: {percent_change:.2f}%\n\n"
+    result += f"📌 **Top Learning Outcome Coverage:**\n{lo_summary}"
+    # Debug logs
+    print("✅ Comparison done.")
+    print(f"LOs analyzed: {len(lo_scores)}")
+    return result
+# Gradio UI
 iface = gr.Interface(
     fn=compare_and_assess,
     inputs=[
         gr.File(label="Upload Old PDF", type="binary"),
         gr.File(label="Upload New PDF", type="binary"),
+        gr.File(label="Upload Learning Outcomes (.txt or .docx)", type="binary"),
     ],
     outputs="text",
     title="📚 Course Handout Comparator + LO Evaluator",
+    description="Compare two PDF handouts and check how well the new version matches your Learning Outcomes. Supports .txt and .docx LO files.",
 )
 iface.launch()