Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Deevyankar commited on Sep 21, 2025

Commit

c3bf685

verified ·

1 Parent(s): 4db3b3a

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -85

app.py CHANGED Viewed

@@ -1,112 +1,122 @@
 import gradio as gr
 import fitz  # PyMuPDF
 import docx
-import io
-import re
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sentence_transformers import SentenceTransformer, util
 import matplotlib.pyplot as plt
-import numpy as np
-from difflib import SequenceMatcher
-model = SentenceTransformer('all-MiniLM-L6-v2')
-def extract_text_from_pdf(pdf_file):
     try:
-        pdf_reader = fitz.open(stream=pdf_file, filetype="pdf")
-        text = ""
-        for page in pdf_reader:
-            text += page.get_text()
-        pdf_reader.close()
-        return text.strip()
     except Exception as e:
-        return ""
-def normalize_text(text):
-    return re.sub(r'\s+', ' ', text.strip().lower())
-def extract_text_from_docx(file_binary):
-    try:
-        doc = docx.Document(io.BytesIO(file_binary))
-        return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
-    except Exception as e:
-        return []
-def semantic_match(lo_list, content):
-    scores = []
-    for lo in lo_list:
-        try:
-            lo_embed = model.encode(lo, convert_to_tensor=True)
-            content_embed = model.encode(content, convert_to_tensor=True)
-            sim = util.pytorch_cos_sim(lo_embed, content_embed).item()
-            scores.append(round(sim, 2))
-        except:
-            scores.append(0.0)
-    return scores
-def content_change_score(text1, text2):
-    try:
-        sim = SequenceMatcher(None, normalize_text(text1), normalize_text(text2)).ratio()
-        return round((1 - sim) * 100, 2)
-    except:
-        return 100.0
 def compare_handouts(old_pdf, new_pdf, lo_file):
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
-    if len(old_text.strip()) < 200 or len(new_text.strip()) < 200:
-        return "⚠️ Could not extract meaningful content from one or both PDFs.", None
-    lo_list = extract_text_from_docx(lo_file)
     if not lo_list:
-        return "⚠️ No learning outcomes detected in .docx file.", None
     old_scores = semantic_match(lo_list, old_text)
     new_scores = semantic_match(lo_list, new_text)
-    change_percent = content_change_score(old_text, new_text)
-    improved_count = sum([n > o for n, o in zip(new_scores, old_scores)])
-    matched_los = sum([n >= o for n, o in zip(new_scores, old_scores)])
-    summary = f"📈 Content Change Estimate: {change_percent}%\n"
-    summary += f"🧠 LO Alignment: {matched_los} of {len(lo_list)} learning outcomes matched\n"
-    if improved_count > 0:
-        summary += "🟢 Summary: New handout has improved structure and added clarity."
     else:
-        summary += "⚠️ Summary: No significant improvement in LO alignment."
-    x = np.arange(len(lo_list))
-    width = 0.35
-    fig, ax = plt.subplots()
-    ax.bar(x - width/2, old_scores, width, label='Old')
-    ax.bar(x + width/2, new_scores, width, label='New')
     ax.set_ylabel('Match Score (0-1)')
-    ax.set_title('LO-wise Match Score: Old vs New')
-    ax.set_xticks(x)
-    ax.set_xticklabels([f"LO{i+1}" for i in range(len(lo_list))], rotation=45)
     ax.legend()
     plt.tight_layout()
-    return summary, fig
 with gr.Blocks() as demo:
-    gr.Markdown("📘 **Educational Content Comparator**")
-    gr.Markdown("Upload 2 handouts and a .docx file of Learning Outcomes to compare changes and alignment.")
     with gr.Row():
-        old_pdf = gr.File(label="📂 Upload Old PDF", file_types=[".pdf"], type="binary")
-        new_pdf = gr.File(label="📂 Upload New PDF", file_types=[".pdf"], type="binary")
-        lo_file = gr.File(label="📂 Upload Learning Outcomes (.docx)", file_types=[".docx"], type="binary")
-    with gr.Row():
-        btn = gr.Button("Submit")
-        clear_btn = gr.Button("Clear")
-    output_text = gr.Textbox(label="📋 Summary", lines=5, interactive=False)
-    output_plot = gr.Plot(label="📊 LO Match Chart")
-    btn.click(fn=compare_handouts, inputs=[old_pdf, new_pdf, lo_file], outputs=[output_text, output_plot])
-    clear_btn.click(fn=lambda: ("", None), inputs=[], outputs=[output_text, output_plot])
 demo.launch()

 import gradio as gr
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
 import fitz  # PyMuPDF
 import docx
 import matplotlib.pyplot as plt
+import pandas as pd
+import tempfile
+def extract_text_from_pdf(file):
+    text = ""
     try:
+        with fitz.open(stream=file, filetype="pdf") as doc:
+            for page in doc:
+                text += page.get_text()
     except Exception as e:
+        print(f"Error extracting PDF text: {e}")
+    return text
+def extract_text_from_docx(file):
+    doc = docx.Document(file)
+    return "
+".join([para.text for para in doc.paragraphs])
+def semantic_match(lo_texts, content):
+    vectorizer = TfidfVectorizer().fit_transform([content] + lo_texts)
+    vectors = vectorizer.toarray()
+    content_vector = vectors[0]
+    lo_vectors = vectors[1:]
+    similarities = cosine_similarity([content_vector], lo_vectors)[0]
+    return similarities.tolist()
 def compare_handouts(old_pdf, new_pdf, lo_file):
+    # Extract text from handouts
     old_text = extract_text_from_pdf(old_pdf)
     new_text = extract_text_from_pdf(new_pdf)
+    if not old_text.strip() or not new_text.strip():
+        return "Could not extract text from one or both PDFs.", None, None
+    # Extract Learning Outcomes
+    lo_text = extract_text_from_docx(lo_file)
+    lo_list = [line.strip() for line in lo_text.split("
+") if line.strip()]
     if not lo_list:
+        return "No learning outcomes detected.", None, None
+    # Match scores
     old_scores = semantic_match(lo_list, old_text)
     new_scores = semantic_match(lo_list, new_text)
+    # Calculate overall change
+    avg_old = sum(old_scores) / len(old_scores)
+    avg_new = sum(new_scores) / len(new_scores)
+    change = round(((avg_new - avg_old) / avg_old) * 100, 2) if avg_old != 0 else 100.0
+    # Summary
+    matched = sum([1 for o, n in zip(old_scores, new_scores) if n >= o])
+    summary = f"📈 Content Change: {change:.2f}%\n🎯 Matched LOs: {matched} of {len(lo_list)}"
+    if change > 10:
+        summary += "\n🟢 New content appears more detailed and informative."
+    elif change < -10:
+        summary += "\n🔴 Some content may have been removed or simplified."
     else:
+        summary += "\n🟡 Minor updates detected."
+    # LO-wise chart and table
+    los = [f"LO{i+1}" for i in range(len(lo_list))]
+    percentage_change = [round(((n - o) / o) * 100, 2) if o else 100.0 for o, n in zip(old_scores, new_scores)]
+    df = pd.DataFrame({
+        "Learning Outcome": los,
+        "Old Score": old_scores,
+        "New Score": new_scores,
+        "% Change": percentage_change
+    })
+    # Table image
+    fig, ax = plt.subplots(figsize=(9, 3))
+    ax.axis('tight')
+    ax.axis('off')
+    table = ax.table(cellText=df.values, colLabels=df.columns, cellLoc='center', loc='center')
+    table.auto_set_font_size(False)
+    table.set_fontsize(10)
+    table.scale(1.2, 1.2)
+    table_path = "/mnt/data/lo_comparison_table.png"
+    plt.savefig(table_path, bbox_inches='tight', dpi=300)
+    plt.close()
+    # Chart image
+    fig, ax = plt.subplots(figsize=(10, 4))
+    bar_width = 0.35
+    index = range(len(los))
+    ax.bar(index, old_scores, bar_width, label='Old', alpha=0.7)
+    ax.bar([i + bar_width for i in index], new_scores, bar_width, label='New', alpha=0.7)
+    ax.set_xticks([i + bar_width / 2 for i in index])
+    ax.set_xticklabels(los)
     ax.set_ylabel('Match Score (0-1)')
+    ax.set_title('LO-wise Match Score Comparison')
     ax.legend()
+    chart_path = "/mnt/data/lo_comparison_chart.png"
     plt.tight_layout()
+    plt.savefig(chart_path)
+    plt.close()
+    return summary, table_path, chart_path
+# Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 📘 Handout Change Analyzer with LO Mapping")
     with gr.Row():
+        old_pdf_input = gr.File(label="Upload Old Handout (PDF)", file_types=[".pdf"])
+        new_pdf_input = gr.File(label="Upload New Handout (PDF)", file_types=[".pdf"])
+        lo_input = gr.File(label="Upload Learning Outcomes (DOCX)", file_types=[".docx"])
+    submit_btn = gr.Button("🔍 Analyze Changes")
+    summary_output = gr.Textbox(label="Summary")
+    lo_table_output = gr.Image(label="📋 LO Comparison Table")
+    lo_chart_output = gr.Image(label="📈 LO Score Chart")
+    submit_btn.click(fn=compare_handouts, inputs=[old_pdf_input, new_pdf_input, lo_input],
+                     outputs=[summary_output, lo_table_output, lo_chart_output])
 demo.launch()