Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 21, 2025

Commit

a4cd443

verified ·

1 Parent(s): 7a8b10d

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -28

app.py CHANGED Viewed

@@ -1,12 +1,17 @@
 import gradio as gr
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 import fitz  # PyMuPDF
 import docx
 import matplotlib.pyplot as plt
 import pandas as pd
 def extract_text_from_pdf(file):
     text = ""
     try:
@@ -22,52 +27,45 @@ def extract_text_from_docx(file):
     return "\n".join([para.text for para in doc.paragraphs])
 def semantic_match(lo_texts, content):
-    vectorizer = TfidfVectorizer().fit_transform([content] + lo_texts)
-    vectors = vectorizer.toarray()
-    content_vector = vectors[0]
-    lo_vectors = vectors[1:]
-    similarities = cosine_similarity([content_vector], lo_vectors)[0]
     return similarities.tolist()
 def compare_handouts(old_pdf, new_pdf, lo_file):
-    # Extract text from handouts
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
     if not old_text.strip() or not new_text.strip():
-        return "Could not extract text from one or both PDFs.", None, None
-    # Extract Learning Outcomes
     lo_text = extract_text_from_docx(lo_file)
     lo_list = [line.strip() for line in lo_text.split("\n") if line.strip()]
     if not lo_list:
-        return "No learning outcomes detected.", None, None
-    # Match scores
     old_scores = semantic_match(lo_list, old_text)
     new_scores = semantic_match(lo_list, new_text)
-    # Calculate overall change
     avg_old = sum(old_scores) / len(old_scores)
     avg_new = sum(new_scores) / len(new_scores)
     change = round(((avg_new - avg_old) / avg_old) * 100, 2) if avg_old != 0 else 100.0
-    # Summary
     matched = sum([1 for o, n in zip(old_scores, new_scores) if n >= o])
     summary = f"📈 Content Change: {change:.2f}%\n🎯 Matched LOs: {matched} of {len(lo_list)}"
     if change > 10:
         summary += "\n🟢 New content appears more detailed and informative."
     elif change < -10:
-        summary += "\n🔴 Some content may have been removed or simplified."
     else:
-        summary += "\n🟡 Minor updates detected."
-    # LO-wise chart and table
     los = [f"LO{i+1}" for i in range(len(lo_list))]
     percentage_change = [round(((n - o) / o) * 100, 2) if o else 100.0 for o, n in zip(old_scores, new_scores)]
     df = pd.DataFrame({
         "Learning Outcome": los,
-        "Old Score": old_scores,
-        "New Score": new_scores,
         "% Change": percentage_change
     })
@@ -83,7 +81,7 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
     plt.savefig(table_path, bbox_inches='tight', dpi=300)
     plt.close()
-    # Chart image
     fig, ax = plt.subplots(figsize=(10, 4))
     bar_width = 0.35
     index = range(len(los))
@@ -91,10 +89,10 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
     ax.bar([i + bar_width for i in index], new_scores, bar_width, label='New', alpha=0.7)
     ax.set_xticks([i + bar_width / 2 for i in index])
     ax.set_xticklabels(los)
-    ax.set_ylabel('Match Score (0-1)')
-    ax.set_title('LO-wise Match Score Comparison')
     ax.legend()
-    chart_path = "/mnt/data/lo_comparison_chart.png"
     plt.tight_layout()
     plt.savefig(chart_path)
     plt.close()
@@ -103,15 +101,15 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
 # Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# 📘 Handout Change Analyzer with LO Mapping")
     with gr.Row():
-        old_pdf_input = gr.File(label="Upload Old Handout (PDF)", file_types=[".pdf"])
-        new_pdf_input = gr.File(label="Upload New Handout (PDF)", file_types=[".pdf"])
-        lo_input = gr.File(label="Upload Learning Outcomes (DOCX)", file_types=[".docx"])
-    submit_btn = gr.Button("🔍 Analyze Changes")
     summary_output = gr.Textbox(label="Summary")
-    lo_table_output = gr.Image(label="📋 LO Comparison Table")
-    lo_chart_output = gr.Image(label="📈 LO Score Chart")
     submit_btn.click(fn=compare_handouts, inputs=[old_pdf_input, new_pdf_input, lo_input],
                      outputs=[summary_output, lo_table_output, lo_chart_output])

 import gradio as gr
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+from sentence_transformers import SentenceTransformer, util
 import fitz  # PyMuPDF
 import docx
 import matplotlib.pyplot as plt
 import pandas as pd
+# Load transformer model
+model = SentenceTransformer("all-MiniLM-L6-v2")
 def extract_text_from_pdf(file):
     text = ""
     try:
     return "\n".join([para.text for para in doc.paragraphs])
 def semantic_match(lo_texts, content):
+    embeddings = model.encode([content] + lo_texts, convert_to_tensor=True)
+    content_embedding = embeddings[0]
+    lo_embeddings = embeddings[1:]
+    similarities = util.pytorch_cos_sim(content_embedding, lo_embeddings)[0]
     return similarities.tolist()
 def compare_handouts(old_pdf, new_pdf, lo_file):
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
     if not old_text.strip() or not new_text.strip():
+        return "❌ Could not extract text from one or both PDFs.", None, None
     lo_text = extract_text_from_docx(lo_file)
     lo_list = [line.strip() for line in lo_text.split("\n") if line.strip()]
     if not lo_list:
+        return "⚠️ No learning outcomes detected in uploaded DOCX file.", None, None
     old_scores = semantic_match(lo_list, old_text)
     new_scores = semantic_match(lo_list, new_text)
     avg_old = sum(old_scores) / len(old_scores)
     avg_new = sum(new_scores) / len(new_scores)
     change = round(((avg_new - avg_old) / avg_old) * 100, 2) if avg_old != 0 else 100.0
     matched = sum([1 for o, n in zip(old_scores, new_scores) if n >= o])
     summary = f"📈 Content Change: {change:.2f}%\n🎯 Matched LOs: {matched} of {len(lo_list)}"
     if change > 10:
         summary += "\n🟢 New content appears more detailed and informative."
     elif change < -10:
+        summary += "\n🔴 Content may have been reduced or simplified."
     else:
+        summary += "\n🟡 Only minor updates detected."
     los = [f"LO{i+1}" for i in range(len(lo_list))]
     percentage_change = [round(((n - o) / o) * 100, 2) if o else 100.0 for o, n in zip(old_scores, new_scores)]
     df = pd.DataFrame({
         "Learning Outcome": los,
+        "Old Score": [round(s, 3) for s in old_scores],
+        "New Score": [round(s, 3) for s in new_scores],
         "% Change": percentage_change
     })
     plt.savefig(table_path, bbox_inches='tight', dpi=300)
     plt.close()
+    # Bar chart
     fig, ax = plt.subplots(figsize=(10, 4))
     bar_width = 0.35
     index = range(len(los))
     ax.bar([i + bar_width for i in index], new_scores, bar_width, label='New', alpha=0.7)
     ax.set_xticks([i + bar_width / 2 for i in index])
     ax.set_xticklabels(los)
+    ax.set_ylabel('Semantic Match (0-1)')
+    ax.set_title('Learning Outcome Comparison')
     ax.legend()
+    chart_path = "/mnt/data/lo_score_chart.png"
     plt.tight_layout()
     plt.savefig(chart_path)
     plt.close()
 # Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Transformer-Based Course Content Comparator")
     with gr.Row():
+        old_pdf_input = gr.File(label="📂 Old Handout (PDF)", file_types=[".pdf"])
+        new_pdf_input = gr.File(label="📂 New Handout (PDF)", file_types=[".pdf"])
+        lo_input = gr.File(label="📄 Learning Outcomes (DOCX)", file_types=[".docx"])
+    submit_btn = gr.Button("🔍 Analyze")
     summary_output = gr.Textbox(label="Summary")
+    lo_table_output = gr.Image(label="📋 LO Change Table")
+    lo_chart_output = gr.Image(label="📈 LO Match Chart")
     submit_btn.click(fn=compare_handouts, inputs=[old_pdf_input, new_pdf_input, lo_input],
                      outputs=[summary_output, lo_table_output, lo_chart_output])