Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 21, 2025

Commit

4c891c6

verified ·

1 Parent(s): 3c108e3

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -21

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import gradio as gr
-import fitz  # PyMuPDF
 from sentence_transformers import SentenceTransformer, util
 import matplotlib.pyplot as plt
 import numpy as np
-import os
-# Load transformer model once
 model = SentenceTransformer("all-MiniLM-L6-v2")
 def extract_text_pdf(file_obj):
@@ -17,7 +15,7 @@ def extract_text_pdf(file_obj):
             for page in doc:
                 text += page.get_text()
             return text if text.strip() else None
-    except Exception as e:
         return None
 def semantic_similarity(text1, text2):
@@ -25,40 +23,77 @@ def semantic_similarity(text1, text2):
     emb2 = model.encode([text2], convert_to_tensor=True)
     return float(util.pytorch_cos_sim(emb1, emb2)[0][0])
-def compare_docs(old_pdf, new_pdf):
     old_text = extract_text_pdf(old_pdf)
     new_text = extract_text_pdf(new_pdf)
     if not old_text or not new_text:
-        return "❌ Could not extract text from one or both PDFs.", None
     sim_score = semantic_similarity(old_text, new_text)
     change_percent = round((1 - sim_score) * 100, 2)
-    summary = f"📈 Estimated Content Change: {change_percent}%\n\n"
-    summary += "🧠 Semantic Similarity Score: {:.2f}\n".format(sim_score)
     if change_percent < 10:
-        summary += "✅ Minor updates detected, mostly similar content."
     elif change_percent < 40:
-        summary += "🔄 Moderate content updates detected."
     else:
-        summary += "🆕 Major revisions and new content identified."
-    return summary, None
 iface = gr.Interface(
-    fn=compare_docs,
     inputs=[
-        gr.File(label="Upload Old Handout (PDF)", file_types=[".pdf"]),
-        gr.File(label="Upload New Handout (PDF)", file_types=[".pdf"])
     ],
     outputs=[
-        gr.Textbox(label="Comparison Summary"),
-        gr.Plot(label="(Coming Soon) Visual Summary")
     ],
-    title="📘 Course Handout Comparator with Semantic AI",
-    description="Upload old and new PDFs to see how much content has changed. Uses transformer model for expert-like judgment.",
 )
 iface.launch()

 import gradio as gr
+import fitz
 from sentence_transformers import SentenceTransformer, util
 import matplotlib.pyplot as plt
+import pandas as pd
 import numpy as np
 model = SentenceTransformer("all-MiniLM-L6-v2")
 def extract_text_pdf(file_obj):
             for page in doc:
                 text += page.get_text()
             return text if text.strip() else None
+    except:
         return None
 def semantic_similarity(text1, text2):
     emb2 = model.encode([text2], convert_to_tensor=True)
     return float(util.pytorch_cos_sim(emb1, emb2)[0][0])
+def compare_with_los(text, lo_list):
+    scores = []
+    for lo in lo_list:
+        score = util.cos_sim(model.encode(lo, convert_to_tensor=True),
+                             model.encode(text, convert_to_tensor=True))[0][0].item()
+        scores.append(round(score * 100, 2))
+    return scores
+def compare_all(old_pdf, new_pdf, lo_file):
     old_text = extract_text_pdf(old_pdf)
     new_text = extract_text_pdf(new_pdf)
     if not old_text or not new_text:
+        return "❌ Could not extract text from one or both PDFs.", None, None
+    # Overall semantic similarity
     sim_score = semantic_similarity(old_text, new_text)
     change_percent = round((1 - sim_score) * 100, 2)
+    summary = f"📈 Content Change: {change_percent}%\n🧠 Similarity Score: {sim_score:.2f}\n\n"
     if change_percent < 10:
+        summary += "✅ Minor content update."
     elif change_percent < 40:
+        summary += "🔄 Moderate update."
     else:
+        summary += "🆕 Significant changes detected."
+    # LO comparison
+    los = lo_file.read().decode("utf-8").splitlines()
+    old_scores = compare_with_los(old_text, los)
+    new_scores = compare_with_los(new_text, los)
+    score_diff = [round(new - old, 2) for old, new in zip(old_scores, new_scores)]
+    df = pd.DataFrame({
+        "Learning Outcome": los,
+        "Old Match (%)": old_scores,
+        "New Match (%)": new_scores,
+        "Change (%)": score_diff
+    })
+    table_html = df.to_html(index=False)
+    # Bar chart
+    fig, ax = plt.subplots(figsize=(10, 4))
+    index = np.arange(len(los))
+    bar_width = 0.35
+    ax.bar(index, old_scores, bar_width, label='Old')
+    ax.bar(index + bar_width, new_scores, bar_width, label='New')
+    ax.set_xlabel('Learning Outcomes')
+    ax.set_ylabel('Match Score (%)')
+    ax.set_title('LO-wise Semantic Match')
+    ax.set_xticks(index + bar_width / 2)
+    ax.set_xticklabels([f"LO{i+1}" for i in range(len(los))], rotation=45)
+    ax.legend()
+    fig.tight_layout()
+    return summary, fig, table_html
 iface = gr.Interface(
+    fn=compare_all,
     inputs=[
+        gr.File(label="Old Handout (PDF)"),
+        gr.File(label="New Handout (PDF)"),
+        gr.File(label="Learning Outcomes (.txt)", file_types=[".txt"])
     ],
     outputs=[
+        gr.Textbox(label="Summary"),
+        gr.Plot(label="LO-wise Bar Chart"),
+        gr.HTML(label="LO-wise Comparison Table")
     ],
+    title="📘 Semantic Handout Comparator with LO Alignment",
+    description="Compare course handouts for overall change and LO alignment using transformer models."
 )
 iface.launch()