Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| import io | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| def extract_text_from_pdf(pdf_file): | |
| try: | |
| reader = PdfReader(io.BytesIO(pdf_file)) | |
| full_text = "" | |
| for page in reader.pages: | |
| text = page.extract_text() | |
| if text: | |
| full_text += text | |
| return full_text.strip() | |
| except Exception as e: | |
| print("PDF extraction error:", e) | |
| return "" | |
| def semantic_match(lo_list, content): | |
| lo_texts = [lo for lo in lo_list if lo.strip()] | |
| vectorizer = TfidfVectorizer().fit_transform([content] + lo_texts) | |
| vectors = vectorizer.toarray() | |
| content_vec = vectors[0] | |
| scores = [cosine_similarity([content_vec], [vec])[0][0] for vec in vectors[1:]] | |
| return scores | |
| def compare_all(old_pdf, new_pdf, lo_file): | |
| try: | |
| los = lo_file.decode("utf-8", errors="ignore").splitlines() | |
| los = [lo.strip() for lo in los if lo.strip()] | |
| except: | |
| return "β Could not read learning outcomes file.", None, None, None | |
| old_text = extract_text_from_pdf(old_pdf) | |
| new_text = extract_text_from_pdf(new_pdf) | |
| if not old_text or not new_text: | |
| return "β Could not extract text from one or both PDFs.", None, None, None | |
| old_scores = semantic_match(los, old_text) | |
| new_scores = semantic_match(los, new_text) | |
| labels = [f"LO{i+1}" for i in range(len(los))] | |
| x = np.arange(len(labels)) | |
| # Plot | |
| fig, ax = plt.subplots() | |
| ax.bar(x - 0.2, old_scores, width=0.4, label="Old", align='center') | |
| ax.bar(x + 0.2, new_scores, width=0.4, label="New", align='center') | |
| ax.set_xticks(x) | |
| ax.set_xticklabels(labels, rotation=45) | |
| ax.set_ylabel("Semantic Match Score") | |
| ax.set_title("Learning Outcomes Comparison") | |
| ax.legend() | |
| # Table | |
| data = { | |
| "Learning Outcome": labels, | |
| "Old Match (%)": [round(s * 100, 2) for s in old_scores], | |
| "New Match (%)": [round(s * 100, 2) for s in new_scores], | |
| "Change (%)": [round((new - old) * 100, 2) for new, old in zip(new_scores, old_scores)] | |
| } | |
| df = pd.DataFrame(data) | |
| # Content similarity | |
| tfidf = TfidfVectorizer().fit_transform([old_text, new_text]) | |
| cosine_sim = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0] * 100 | |
| content_diff = 100 - round(cosine_sim, 2) | |
| # Text size change | |
| len_old = len(re.findall(r'\w+', old_text)) | |
| len_new = len(re.findall(r'\w+', new_text)) | |
| word_change_percent = round(((len_new - len_old) / len_old) * 100, 2) | |
| summary = f""" | |
| π **Summary of Comparison** | |
| π **Overall Content Change**: {content_diff:.2f}% | |
| π This is based on TF-IDF cosine similarity between old and new handouts. | |
| π **Text Length Difference**: {'+' if word_change_percent >= 0 else ''}{word_change_percent:.2f}% | |
| Compared by total number of words in both handouts. | |
| π― **Learning Outcome Matches**: {sum(1 for s in new_scores if s >= 0.5)} of {len(los)} | |
| β New handout appears {'more' if sum(new_scores) > sum(old_scores) else 'less'} aligned with stated outcomes. | |
| """ | |
| return summary.strip(), df, fig, "β Comparison completed successfully." | |
| iface = gr.Interface( | |
| fn=compare_all, | |
| inputs=[ | |
| gr.File(label="Old Handout PDF", type='binary'), | |
| gr.File(label="New Handout PDF", type='binary'), | |
| gr.File(label="Learning Outcomes (Text File)", type='binary'), | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="π Summary & Insights", lines=20, max_lines=25), | |
| gr.Dataframe(label="π LO-wise Comparison Table"), | |
| gr.Plot(label="π LO Visual Comparison"), | |
| gr.Textbox(label="βΉοΈ Status", lines=1) | |
| ], | |
| title="π Handout Comparator + LO Analyzer", | |
| description="Upload OLD and NEW handouts in PDF format along with a TXT file of Learning Outcomes. The app compares content changes and evaluates alignment with LOs visually and in table format." | |
| ) | |
| iface.launch() | |