Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Handouts / app5.py

Deevyankar

Rename app.py to app5.py

107339c verified 3 months ago

raw

history blame contribute delete

4.78 kB



	import gradio as gr
	from PyPDF2 import PdfReader
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from sentence_transformers import SentenceTransformer, util
	import matplotlib.pyplot as plt
	import pandas as pd
	import io

	# Load transformer model for semantic similarity
	model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

	def extract_text_from_pdf(file_bytes):
	try:
	reader = PdfReader(io.BytesIO(file_bytes))
	return " ".join([page.extract_text() or "" for page in reader.pages]).strip()
	except Exception as e:
	print("Error extracting text:", e)
	return ""

	def tfidf_similarity(text1, text2):
	vectorizer = TfidfVectorizer().fit_transform([text1, text2])
	return cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0]

	def transformer_similarity(text1, text2):
	embeddings = model.encode([text1, text2], convert_to_tensor=True)
	return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()

	def bloom_level(term):
	term = term.lower()
	blooms = {
	"remember": ["define", "list", "recall", "identify"],
	"understand": ["explain", "describe", "summarize"],
	"apply": ["apply", "demonstrate", "use"],
	"analyze": ["analyze", "compare", "contrast"],
	"evaluate": ["evaluate", "judge", "critique"],
	"create": ["create", "design", "formulate"]
	}
	for level, keywords in blooms.items():
	if any(word in term for word in keywords):
	return level.capitalize()
	return "Unknown"

	def lo_semantic_scores(los, content):
	scores = []
	for lo in los:
	score = transformer_similarity(lo, content)
	scores.append(score)
	return scores

	def compare_all(old_pdf, new_pdf, lo_file):
	try:
	lo_content = lo_file.read().decode("utf-8", errors="ignore") if hasattr(lo_file, "read") else lo_file.decode("utf-8", errors="ignore")
	los = [line.strip() for line in lo_content.splitlines() if line.strip()]
	except Exception as e:
	return "❌ Could not read learning outcomes file.", None, None, None

	old_text = extract_text_from_pdf(old_pdf)
	new_text = extract_text_from_pdf(new_pdf)

	if not old_text or not new_text:
	return "❌ Could not extract text from one or both PDFs.", None, None, None

	tfidf_sim = tfidf_similarity(old_text, new_text)
	transformer_sim = transformer_similarity(old_text, new_text)
	text_growth = round(((len(new_text) - len(old_text)) / len(old_text)) * 100, 2)

	old_scores = lo_semantic_scores(los, old_text)
	new_scores = lo_semantic_scores(los, new_text)

	labels = [f"LO{i+1}" for i in range(len(los))]
	x = range(len(labels))
	fig, ax = plt.subplots(figsize=(10, 5))
	ax.bar(x, old_scores, width=0.4, label="Old", align='center')
	ax.bar([i + 0.4 for i in x], new_scores, width=0.4, label="New", align='center')
	ax.set_xticks([i + 0.2 for i in x])
	ax.set_xticklabels(labels, rotation=45)
	ax.set_ylabel("Semantic Match Score")
	ax.set_title("Learning Outcomes Comparison")
	ax.legend()

	data = {
	"Learning Outcome": labels,
	"LO Text": los,
	"Bloom Level": [bloom_level(lo) for lo in los],
	"Old Match": [round(s*100, 2) for s in old_scores],
	"New Match": [round(s*100, 2) for s in new_scores],
	"Change (%)": [round((n - o)*100, 2) for n, o in zip(new_scores, old_scores)]
	}
	df = pd.DataFrame(data)

	summary = f"""📘 Summary of Comparison

	📈 TF-IDF Content Change: {round((1 - tfidf_sim) * 100, 2)}%
	🧠 Transformer-based Similarity: {round(transformer_sim * 100, 2)}%
	📝 Content Length Change: {text_growth}% {"📉 Reduced" if text_growth < 0 else "📈 Increased"}

	🎯 LO Matches: {sum(1 for score in new_scores if score > 0.5)} of {len(los)}
	📊 Content appears {'more' if sum(new_scores) > sum(old_scores) else 'less'} aligned with learning outcomes.
	"""

	return summary, df, fig, new_text[:2000] + "..."

	import gradio as gr

	iface = gr.Interface(
	fn=compare_all,
	inputs=[
	gr.File(label="Old Handout PDF", type='binary'),
	gr.File(label="New Handout PDF", type='binary'),
	gr.File(label="Learning Outcomes (TXT)", type='binary')
	],
	outputs=[
	gr.Markdown(label="📋 Summary"),
	gr.Dataframe(label="📊 LO-wise Comparison Table"),
	gr.Plot(label="📈 LO Match Chart"),
	gr.Textbox(label="📝 Preview of New Content")
	],
	title="📘 AI Handout Comparator + LO Aligner",
	description="Compare two versions of handouts using both TF-IDF and Transformers. Analyze changes in content, alignment with Learning Outcomes, and Bloom’s taxonomy level."
	)

	iface.launch()