Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 18, 2025

Commit

0a99a54

verified ·

1 Parent(s): e17d93e

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -35

app.py CHANGED Viewed

@@ -1,22 +1,31 @@
-# app.py
 import gradio as gr
 import difflib
-from docx import Document
-import os
-def extract_text(file):
-    ext = os.path.splitext(file.name)[1]
-    if ext == ".txt":
-        return file.read().decode("utf-8")
-    elif ext == ".docx":
-        doc = Document(file)
-        return "\n".join([para.text for para in doc.paragraphs])
     else:
-        return "Unsupported file type. Please upload a .txt or .docx file."
-def compare_documents(old_file, new_file):
-    old_text = extract_text(old_file)
-    new_text = extract_text(new_file)
     old_lines = old_text.splitlines()
     new_lines = new_text.splitlines()
@@ -24,24 +33,20 @@ def compare_documents(old_file, new_file):
     diff = list(difflib.unified_diff(old_lines, new_lines))
     added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
     removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
     percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
-    summary = f"📈 Updated Content: {percent_change:.2f}%\n\n"
-    summary += f"🔼 Added: {len(added)} lines\n🔽 Removed: {len(removed)} lines\n\n"
-    preview = "\n".join(diff[:100]) or "No differences found."
-    return summary + preview
-# Gradio Interface
-iface = gr.Interface(
-    fn=compare_documents,
-    inputs=[
-        gr.File(label="Upload Old Document (.txt or .docx)"),
-        gr.File(label="Upload New Document (.txt or .docx)")
-    ],
-    outputs="text",
-    title="📄 Document Version Comparator",
-    description="Upload two versions of a document to compare updates and get change percentage. Supports .txt and .docx files."
-)
-iface.launch()

 import gradio as gr
+import fitz  # PyMuPDF
 import difflib
+from sentence_transformers import SentenceTransformer, util
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def extract_text_from_pdf(pdf_file):
+    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+    full_text = ""
+    for page in doc:
+        full_text += page.get_text()
+    return full_text
+def extract_los(lo_file):
+    if lo_file.name.endswith('.txt'):
+        return lo_file.read().decode('utf-8').splitlines()
+    elif lo_file.name.endswith('.docx'):
+        from docx import Document
+        doc = Document(lo_file)
+        return [para.text for para in doc.paragraphs if para.text.strip()]
     else:
+        return []
+def compare_and_assess(old_pdf, new_pdf, lo_file):
+    # Compare PDFs
+    old_text = extract_text_from_pdf(old_pdf)
+    new_text = extract_text_from_pdf(new_pdf)
     old_lines = old_text.splitlines()
     new_lines = new_text.splitlines()
     diff = list(difflib.unified_diff(old_lines, new_lines))
     added = [line for line in diff if line.startswith('+') and not line.startswith('+++')]
     removed = [line for line in diff if line.startswith('-') and not line.startswith('---')]
     percent_change = (len(added) + len(removed)) / max(len(old_lines), 1) * 100
+    # LO analysis
+    los = extract_los(lo_file)
+    new_emb = model.encode(new_text, convert_to_tensor=True)
+    lo_scores = []
+    for lo in los:
+        lo_emb = model.encode(lo, convert_to_tensor=True)
+        score = util.cos_sim(new_emb, lo_emb).max().item()
+        lo_scores.append(f"• {lo[:80]}: {score*100:.1f}% relevance")
+    # Format Output
+    summary = f"📈 Content Updated: {percent_change:.2f}%\n"
+    summary += f"🔼 Added Lines: {len(added)} | 🔽 Removed Lines: {len(removed)}\n\n"
+    summary += "🎯 Learning Outcome Coverage:\n" + "\n".join(lo_scores[:10])
+    return summary