Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 19, 2025

Commit

39ec5fb

verified ·

1 Parent(s): 843f763

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -34

app.py CHANGED Viewed

@@ -3,12 +3,12 @@ import fitz  # PyMuPDF
 import difflib
 from sentence_transformers import SentenceTransformer, util
 from docx import Document
-# Load the AI model for semantic similarity
 model = SentenceTransformer('all-MiniLM-L6-v2')
-# Extract raw text from PDF
 def extract_text_from_pdf(pdf_file):
     doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
     full_text = ""
@@ -16,8 +16,7 @@ def extract_text_from_pdf(pdf_file):
         full_text += page.get_text()
     return full_text
-import io
 def extract_los(lo_file):
     if lo_file.name.endswith('.txt'):
         return lo_file.read().decode('utf-8').splitlines()
@@ -28,54 +27,51 @@ def extract_los(lo_file):
     else:
         return []
-""""# Extract lines from uploaded LO file (.txt or .docx)
-def extract_los(lo_file):
-    if lo_file.name.endswith('.txt'):
-        return lo_file.read().decode('utf-8').splitlines()
-    elif lo_file.name.endswith('.docx'):
-        doc = Document(lo_file)
-        return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
-    else:
-        return []"""
-# Main function to compare PDFs and assess LO coverage
 def compare_and_assess(old_pdf, new_pdf, lo_file):
-    # Extract content
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
-    # Compare versions
     old_lines = old_text.splitlines()
     new_lines = new_text.splitlines()
     diff = list(difflib.unified_diff(old_lines, new_lines))
     added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
     removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
     percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
-    # Learning Outcome Analysis
     los = extract_los(lo_file)
     lo_scores = []
-    if los:
-        new_emb = model.encode(new_text, convert_to_tensor=True)
-        for lo in los:
-            lo_emb = model.encode(lo, convert_to_tensor=True)
-            sim = util.cos_sim(new_emb, lo_emb).max().item()
-            lo_scores.append(f"• {lo[:80]}: {sim*100:.1f}% relevant")
     # Output
     summary = f"📈 Content Updated: {percent_change:.2f}%\n"
     summary += f"🔼 Added Lines: {len(added)}\n🔽 Removed Lines: {len(removed)}\n\n"
-    if lo_scores:
-        summary += "🎯 Learning Outcome Coverage:\n" + "\n".join(lo_scores[:10])
-    else:
-        summary += "⚠️ No valid Learning Outcome file uploaded."
-    return summary
-# Define Gradio interface
 iface = gr.Interface(
     fn=compare_and_assess,
     inputs=[
@@ -85,7 +81,8 @@ iface = gr.Interface(
     ],
     outputs="text",
     title="📚 Course Handout Comparator + LO Evaluator",
-    description="Upload two PDF handouts (old + new) and a Learning Outcome file. The app compares content, calculates % updated, and checks how well the new handout meets your course learning outcomes."
 )
 iface.launch()

 import difflib
 from sentence_transformers import SentenceTransformer, util
 from docx import Document
+import io
+# Load the sentence-transformer model
 model = SentenceTransformer('all-MiniLM-L6-v2')
+# Extract text from PDF using PyMuPDF
 def extract_text_from_pdf(pdf_file):
     doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
     full_text = ""
         full_text += page.get_text()
     return full_text
+# Extract Learning Outcomes from .txt or .docx
 def extract_los(lo_file):
     if lo_file.name.endswith('.txt'):
         return lo_file.read().decode('utf-8').splitlines()
     else:
         return []
+# Main app logic
 def compare_and_assess(old_pdf, new_pdf, lo_file):
+    if not old_pdf or not new_pdf or not lo_file:
+        return "❌ Please upload all three files."
+    # Extract text
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
+    if len(old_text.strip()) < 50 or len(new_text.strip()) < 50:
+        return "⚠️ One of the PDFs may be empty or unreadable."
+    # Diff analysis
     old_lines = old_text.splitlines()
     new_lines = new_text.splitlines()
     diff = list(difflib.unified_diff(old_lines, new_lines))
     added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
     removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
     percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
+    # LO analysis
     los = extract_los(lo_file)
+    if not los:
+        return "⚠️ No valid Learning Outcomes found in the file."
+    new_emb = model.encode(new_text, convert_to_tensor=True)
     lo_scores = []
+    for lo in los:
+        lo_emb = model.encode(lo, convert_to_tensor=True)
+        sim = util.cos_sim(new_emb, lo_emb).max().item()
+        lo_scores.append(f"• {lo[:80]}: {sim*100:.1f}% relevant")
     # Output
     summary = f"📈 Content Updated: {percent_change:.2f}%\n"
     summary += f"🔼 Added Lines: {len(added)}\n🔽 Removed Lines: {len(removed)}\n\n"
+    summary += "🎯 Learning Outcome Coverage:\n" + "\n".join(lo_scores[:10])
+    # Debug logs (can be viewed in Hugging Face Logs tab)
+    print("✅ PDFs compared successfully.")
+    print("LOs evaluated:", len(lo_scores))
+    return summary
+# Gradio interface
 iface = gr.Interface(
     fn=compare_and_assess,
     inputs=[
     ],
     outputs="text",
     title="📚 Course Handout Comparator + LO Evaluator",
+    description="Compare two PDF handouts (old + new) and a Learning Outcome file. Calculates % updated and checks how well the new content aligns with your course outcomes."
 )
 iface.launch()