Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Handouts / app.py

Deevyankar

Rename app6.py to app.py

cd538b1 verified 3 months ago

raw

history blame contribute delete

4.84 kB


	import gradio as gr
	from PyPDF2 import PdfReader
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from transformers import pipeline
	import matplotlib.pyplot as plt
	import pandas as pd
	import io

	semantic_pipeline = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")

	def extract_text_from_pdf(pdf_file):
	try:
	reader = PdfReader(io.BytesIO(pdf_file))
	full_text = ""
	for page in reader.pages:
	text = page.extract_text()
	if text:
	full_text += text
	return full_text.strip()
	except Exception as e:
	print("Error reading PDF:", e)
	return ""

	def tfidf_similarity(text1, text2):
	vec = TfidfVectorizer().fit_transform([text1, text2])
	return cosine_similarity(vec[0:1], vec[1:2])[0][0]

	def transformer_similarity(text1, text2):
	emb1 = semantic_pipeline(text1[:512])[0]
	emb2 = semantic_pipeline(text2[:512])[0]
	emb1_avg = [sum(x)/len(x) for x in zip(*emb1)]
	emb2_avg = [sum(x)/len(x) for x in zip(*emb2)]
	return cosine_similarity([emb1_avg], [emb2_avg])[0][0]

	def semantic_match(lo_list, content):
	vectorizer = TfidfVectorizer().fit_transform([content] + lo_list)
	vectors = vectorizer.toarray()
	content_vec = vectors[0]
	scores = [cosine_similarity([content_vec], [vec])[0][0] for vec in vectors[1:]]
	return scores

	def compare_all(old_pdf, new_pdf, lo_file):
	try:
	los = lo_file.decode("utf-8", errors="ignore").splitlines()
	los = [lo.strip() for lo in los if lo.strip()]
	except:
	return "❌ Could not read learning outcomes file.", None, None, None, None, None

	old_text = extract_text_from_pdf(old_pdf)
	new_text = extract_text_from_pdf(new_pdf)

	if not old_text or not new_text:
	return "❌ Could not extract text from one or both PDFs.", None, None, None, None, None

	old_scores = semantic_match(los, old_text)
	new_scores = semantic_match(los, new_text)

	labels = [f"LO{i+1}" for i in range(len(los))]
	x = range(len(labels))
	fig, ax = plt.subplots()
	ax.bar(x, old_scores, width=0.4, label="Old", align='center')
	ax.bar([i + 0.4 for i in x], new_scores, width=0.4, label="New", align='center')
	ax.set_xticks([i + 0.2 for i in x])
	ax.set_xticklabels(labels, rotation=45)
	ax.set_ylabel("Semantic Match Score")
	ax.set_title("Learning Outcomes Comparison")
	ax.legend()

	df = pd.DataFrame({
	"Learning Outcome": labels,
	"Old Match (%)": [round(s*100, 2) for s in old_scores],
	"New Match (%)": [round(s*100, 2) for s in new_scores],
	"Change (%)": [round((n - o)*100, 2) for o, n in zip(old_scores, new_scores)],
	})

	tfidf_sim = tfidf_similarity(old_text, new_text)
	transformer_sim = transformer_similarity(old_text, new_text)
	text_growth = (len(new_text) - len(old_text)) / len(old_text) * 100

	summary = f"📘 Summary of Comparison"
	summary += f"📈 TF-IDF Content Similarity: {round(tfidf_sim * 100, 2)}%"
	summary += f"🤖 Transformer-based Similarity: {round(transformer_sim * 100, 2)}%"
	summary += f"📝 Text Growth: {'+' if text_growth >= 0 else ''}{round(text_growth, 2)}% more content in new handout"
	summary += f"🎯 LOs Matched (New ≥ 0.5): {sum(1 for s in new_scores if s >= 0.5)} of {len(los)}"
	summary += f"📊 Insight: New content appears {'more' if sum(new_scores) > sum(old_scores) else 'less'} aligned with outcomes."

	explanation = ("---"
	"📚 Explanation of Methods:"

	"- TF-IDF Similarity checks how often important words appear in both documents. It gives a quick idea of textual overlap."

	"- Transformer Similarity uses AI to understand meaning beyond words. It compares the 'sense' of the documents like a human would."
	)

	return summary + explanation, df, fig, new_text

	iface = gr.Interface(
	fn=compare_all,
	inputs=[
	gr.File(label="Old Handout PDF", type='binary'),
	gr.File(label="New Handout PDF", type='binary'),
	gr.File(label="Learning Outcomes (Text File)", type='binary'),
	],
	outputs=[
	gr.Markdown(label="📘 Summary & Insights"),
	gr.Dataframe(label="📊 LO-wise Comparison Table"),
	gr.Plot(label="📈 Visual LO Change Chart"),
	gr.Textbox(label="📄 New Handout Preview (Full Text)", lines=20, interactive=False),
	],
	title="📘 Handout Comparison + LO Semantic Analysis",
	description="Upload two handouts (old and new) and a text file of Learning Outcomes (LOs). This tool compares content using TF-IDF and Transformers, visualizes LO changes, and explains results in simple terms.",
	)

	iface.launch()