Spaces:
Sleeping
Sleeping
File size: 4,103 Bytes
624ba1c 62adbdf e1fb09a 624ba1c 8751b01 62adbdf bc78525 9af4462 ef62d76 62adbdf 624ba1c 62adbdf 624ba1c b0754aa 624ba1c 9af4462 8751b01 9af4462 8751b01 bc78525 9af4462 8751b01 98a66f5 3378b7b 4c891c6 624ba1c 9af4462 624ba1c b0754aa 624ba1c 9af4462 ac4d1e6 8751b01 624ba1c 9af4462 3378b7b 9af4462 624ba1c 4c891c6 b0754aa 8751b01 624ba1c 9af4462 8751b01 9af4462 3378b7b 9af4462 3378b7b 8751b01 9af4462 8751b01 3378b7b 9af4462 3378b7b 3c108e3 9af4462 3c108e3 4c891c6 3c108e3 624ba1c b50e58b 3c108e3 9af4462 3c108e3 9af4462 3c108e3 bc78525 9af4462 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import gradio as gr
from PyPDF2 import PdfReader
import io
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
def extract_text_from_pdf(pdf_file):
try:
reader = PdfReader(io.BytesIO(pdf_file))
full_text = ""
for page in reader.pages:
text = page.extract_text()
if text:
full_text += text
return full_text.strip()
except Exception as e:
print("PDF extraction error:", e)
return ""
def semantic_match(lo_list, content):
lo_texts = [lo for lo in lo_list if lo.strip()]
vectorizer = TfidfVectorizer().fit_transform([content] + lo_texts)
vectors = vectorizer.toarray()
content_vec = vectors[0]
scores = [cosine_similarity([content_vec], [vec])[0][0] for vec in vectors[1:]]
return scores
def compare_all(old_pdf, new_pdf, lo_file):
try:
los = lo_file.decode("utf-8", errors="ignore").splitlines()
los = [lo.strip() for lo in los if lo.strip()]
except:
return "β Could not read learning outcomes file.", None, None, None
old_text = extract_text_from_pdf(old_pdf)
new_text = extract_text_from_pdf(new_pdf)
if not old_text or not new_text:
return "β Could not extract text from one or both PDFs.", None, None, None
old_scores = semantic_match(los, old_text)
new_scores = semantic_match(los, new_text)
labels = [f"LO{i+1}" for i in range(len(los))]
x = np.arange(len(labels))
# Plot
fig, ax = plt.subplots()
ax.bar(x - 0.2, old_scores, width=0.4, label="Old", align='center')
ax.bar(x + 0.2, new_scores, width=0.4, label="New", align='center')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=45)
ax.set_ylabel("Semantic Match Score")
ax.set_title("Learning Outcomes Comparison")
ax.legend()
# Table
data = {
"Learning Outcome": labels,
"Old Match (%)": [round(s * 100, 2) for s in old_scores],
"New Match (%)": [round(s * 100, 2) for s in new_scores],
"Change (%)": [round((new - old) * 100, 2) for new, old in zip(new_scores, old_scores)]
}
df = pd.DataFrame(data)
# Content similarity
tfidf = TfidfVectorizer().fit_transform([old_text, new_text])
cosine_sim = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0] * 100
content_diff = 100 - round(cosine_sim, 2)
# Text size change
len_old = len(re.findall(r'\w+', old_text))
len_new = len(re.findall(r'\w+', new_text))
word_change_percent = round(((len_new - len_old) / len_old) * 100, 2)
summary = f"""
π **Summary of Comparison**
π **Overall Content Change**: {content_diff:.2f}%
π This is based on TF-IDF cosine similarity between old and new handouts.
π **Text Length Difference**: {'+' if word_change_percent >= 0 else ''}{word_change_percent:.2f}%
Compared by total number of words in both handouts.
π― **Learning Outcome Matches**: {sum(1 for s in new_scores if s >= 0.5)} of {len(los)}
β
New handout appears {'more' if sum(new_scores) > sum(old_scores) else 'less'} aligned with stated outcomes.
"""
return summary.strip(), df, fig, "β
Comparison completed successfully."
iface = gr.Interface(
fn=compare_all,
inputs=[
gr.File(label="Old Handout PDF", type='binary'),
gr.File(label="New Handout PDF", type='binary'),
gr.File(label="Learning Outcomes (Text File)", type='binary'),
],
outputs=[
gr.Textbox(label="π Summary & Insights", lines=20, max_lines=25),
gr.Dataframe(label="π LO-wise Comparison Table"),
gr.Plot(label="π LO Visual Comparison"),
gr.Textbox(label="βΉοΈ Status", lines=1)
],
title="π Handout Comparator + LO Analyzer",
description="Upload OLD and NEW handouts in PDF format along with a TXT file of Learning Outcomes. The app compares content changes and evaluates alignment with LOs visually and in table format."
)
iface.launch()
|