Spaces:

Deevyankar
/

Handouts

Sleeping

App Files Files Community

Handouts / app3.py

Deevyankar

Rename app.py to app3.py

042fd8d verified 3 months ago

raw

history blame contribute delete

4.14 kB

	import gradio as gr
	import fitz # PyMuPDF
	import docx
	import io
	import re
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sentence_transformers import SentenceTransformer, util
	import matplotlib.pyplot as plt
	import numpy as np
	from difflib import SequenceMatcher

	model = SentenceTransformer('all-MiniLM-L6-v2')

	def extract_text_from_pdf(pdf_file):
	try:
	pdf_reader = fitz.open(stream=pdf_file, filetype="pdf")
	text = ""
	for page in pdf_reader:
	text += page.get_text()
	pdf_reader.close()
	return text.strip()
	except Exception as e:
	return ""

	def normalize_text(text):
	return re.sub(r'\s+', ' ', text.strip().lower())

	def extract_text_from_docx(docx_file):
	try:
	doc = docx.Document(io.BytesIO(docx_file))
	full_text = []
	for para in doc.paragraphs:
	if para.text.strip():
	full_text.append(para.text.strip())
	return full_text
	except:
	return []

	def semantic_match(lo_list, content):
	scores = []
	for lo in lo_list:
	try:
	lo_embed = model.encode(lo, convert_to_tensor=True)
	content_embed = model.encode(content, convert_to_tensor=True)
	sim = util.pytorch_cos_sim(lo_embed, content_embed).item()
	scores.append(round(sim, 2))
	except:
	scores.append(0.0)
	return scores

	def content_change_score(text1, text2):
	try:
	sim = SequenceMatcher(None, normalize_text(text1), normalize_text(text2)).ratio()
	return round((1 - sim) * 100, 2)
	except:
	return 100.0

	def compare_handouts(old_pdf, new_pdf, lo_file):
	old_text = extract_text_from_pdf(old_pdf)
	new_text = extract_text_from_pdf(new_pdf)

	if len(old_text.strip()) < 200 or len(new_text.strip()) < 200:
	return "⚠️ Could not extract meaningful content from one or both PDFs.", None

	lo_list = extract_text_from_docx(lo_file)
	if not lo_list:
	return "⚠️ No learning outcomes detected.", None

	old_scores = semantic_match(lo_list, old_text)
	new_scores = semantic_match(lo_list, new_text)

	change_percent = content_change_score(old_text, new_text)
	improved_count = sum([n > o for n, o in zip(new_scores, old_scores)])
	matched_los = sum([n >= o for n, o in zip(new_scores, old_scores)])

	summary = f"📈 Content Change Estimate: {change_percent}%\n"
	summary += f"🧠 LO Alignment: {matched_los} of {len(lo_list)} learning outcomes matched\n"
	if improved_count > 0:
	summary += "🟢 Summary: New handout has improved structure and added clarity."
	else:
	summary += "⚠️ Summary: No significant improvement in LO alignment."

	# Plot
	x = np.arange(len(lo_list))
	width = 0.35
	fig, ax = plt.subplots()
	ax.bar(x - width/2, old_scores, width, label='Old')
	ax.bar(x + width/2, new_scores, width, label='New')
	ax.set_ylabel('Match Score (0-1)')
	ax.set_title('LO-wise Match Score: Old vs New')
	ax.set_xticks(x)
	ax.set_xticklabels([f"LO{i+1}" for i in range(len(lo_list))], rotation=45)
	ax.legend()
	plt.tight_layout()

	return summary, fig

	with gr.Blocks() as demo:
	gr.Markdown("📘 Educational Content Comparator")
	gr.Markdown("Upload 2 handouts and a .docx file of Learning Outcomes to compare changes and alignment.")

	with gr.Row():
	old_pdf = gr.File(label="📂 Upload Old PDF", file_types=[".pdf"], type="binary")
	new_pdf = gr.File(label="📂 Upload New PDF", file_types=[".pdf"], type="binary")
	lo_file = gr.File(label="📂 Upload Learning Outcomes (.docx)", file_types=[".docx"], type="binary")

	with gr.Row():
	btn = gr.Button("Submit")
	clear_btn = gr.Button("Clear")

	output_text = gr.Textbox(label="📋 Summary", lines=5, interactive=False)
	output_plot = gr.Plot(label="📊 LO Match Chart")

	btn.click(fn=compare_handouts, inputs=[old_pdf, new_pdf, lo_file], outputs=[output_text, output_plot])
	clear_btn.click(fn=lambda: ("", None), inputs=[], outputs=[output_text, output_plot])

	demo.launch()