Spaces:

massivetexts
/

ocs-semantic-scoring

Running

Peter Organisciak

Restore GloVe to dropdown; files uploading to HF

e5d14d0 10 days ago

11.7 kB

	"""
	OCS Semantic Scoring - Hugging Face Space

	Scores creativity of divergent thinking responses using semantic distance
	in word embedding space. Part of the Open Creativity Scoring project.

	See: https://openscoring.du.edu
	"""

	import gradio as gr
	import pandas as pd
	import tempfile
	import os
	from scoring import SemanticScorer, download_model, ensure_spacy_model, MODELS, DEFAULT_MODEL

	# Global scorer instances keyed by model name
	scorers = {}
	current_model = DEFAULT_MODEL


	def get_scorer(model_name=None):
	"""Get or create a scorer for the given model."""
	if model_name is None:
	model_name = current_model
	return scorers.get(model_name)


	def load_model(model_name=None, progress=gr.Progress()):
	"""Download and load a model."""
	global current_model
	if model_name is None:
	model_name = DEFAULT_MODEL

	if not MODELS.get(model_name, {}).get("hosted", True):
	return (
	f"{model_name} is not hosted on this Space (model files are too large). "
	"See the About tab for self-hosting instructions."
	)

	if model_name in scorers:
	current_model = model_name
	return f"{model_name} already loaded."

	progress(0, desc="Ensuring spaCy model is available...")
	ensure_spacy_model()

	progress(0.1, desc=f"Downloading {model_name} from Hugging Face Hub...")
	model_path = download_model(model_name)

	progress(0.5, desc="Loading model into memory (this may take a moment)...")
	scorer = SemanticScorer(model_name=model_name)
	scorer.load_model(model_path)
	scorers[model_name] = scorer
	current_model = model_name

	progress(1.0, desc="Ready!")
	return f"{model_name} loaded successfully."


	def score_single(prompt, response, model_name, stopword, term_weighting, exclude_target,
	normalize, elab_method, progress=gr.Progress()):
	"""Score a single prompt-response pair."""
	scorer = get_scorer(model_name)
	if scorer is None:
	load_model(model_name, progress)
	scorer = get_scorer(model_name)

	if not prompt or not response:
	return "Please provide both a prompt and a response."

	orig = scorer.originality(
	prompt.strip(), response.strip(),
	stopword=stopword,
	term_weighting=term_weighting,
	exclude_target=exclude_target,
	)

	if orig is None:
	result = "Could not score - no recognized words found in response."
	else:
	if normalize:
	import numpy as np
	orig = scorer._scaler.transform(np.array([[orig]]))[0, 0]
	result = f"Originality: {orig:.1f} (on 1-7 scale)"
	else:
	result = f"Originality: {orig:.4f} (cosine distance, 0-1 scale)"

	if elab_method and elab_method != "none":
	elab = scorer.elaboration(response.strip(), method=elab_method)
	result += f"\nElaboration ({elab_method}): {elab}"

	return result


	def score_batch(file, model_name, stopword, term_weighting, exclude_target, normalize,
	elab_method, progress=gr.Progress()):
	"""Score a CSV file of prompt-response pairs."""
	scorer = get_scorer(model_name)
	if scorer is None:
	load_model(model_name, progress)
	scorer = get_scorer(model_name)

	if file is None:
	return None, "Please upload a CSV file."

	try:
	df = pd.read_csv(file.name)
	except Exception as e:
	return None, f"Error reading CSV: {e}"

	# Normalize column names
	df.columns = [c.strip().lower() for c in df.columns]

	if "prompt" not in df.columns or "response" not in df.columns:
	# Try to use first two columns
	if len(df.columns) >= 2:
	df.columns = ["prompt", "response"] + list(df.columns[2:])
	else:
	return None, "CSV must have at least two columns (prompt, response)."

	elab = elab_method if elab_method != "none" else None

	progress(0.2, desc=f"Scoring {len(df)} responses...")
	scored = scorer.score_batch(
	df, stopword=stopword, term_weighting=term_weighting,
	exclude_target=exclude_target, normalize=normalize,
	elab_method=elab,
	)
	progress(0.9, desc="Preparing output...")

	# Save to temp file for download
	output_path = os.path.join(tempfile.gettempdir(), "scored_output.csv")
	scored.to_csv(output_path, index=False)

	return output_path, scored.head(20).to_string(index=False)


	# Citation text
	CITATION_TEXT = """
	Citations:

	Dumas, D., Organisciak, P., & Doherty, M. D. (2020). Measuring divergent thinking
	originality with human raters and text-mining models: A psychometric comparison of
	methods. Psychology of Aesthetics, Creativity, and the Arts.
	https://doi.org/10/ghcsqq

	Organisciak, P., Acar, S., Dumas, D., & Berthiaume, K. (2023). Beyond semantic
	distance: Automated scoring of divergent thinking greatly improves with large
	language models. Thinking Skills and Creativity, 49, 101356.

	Note: For LLM-based scoring (the newer, recommended approach), see
	[openscoring.du.edu](https://openscoring.du.edu) and the
	[ocsai library](https://github.com/massivetexts/ocsai).
	"""

	ABOUT_TEXT = """
	# OCS Semantic Scoring

	Scores creativity of divergent thinking responses (e.g., Alternate Uses Task)
	by measuring semantic distance between a prompt and response in word
	embedding space.

	How it works:
	1. Looks up word vectors for the prompt and response in the selected embedding model
	2. Computes cosine similarity between them
	3. Subtracts from 1 to get a distance score (higher = more original)

	Available models:
	- MOTES 100k (default): Children's writing embeddings (ages 10–12) from the MOTES study
	- GloVe 840B: General-purpose embeddings trained on 840B Common Crawl tokens (Pennington et al. 2014). Large vocabulary, good for adult responses. English only.

	Options:
	- Stopword filtering: Skip common functional words (the, and, etc.)
	- Term weighting: Weight words by IDF (rarer words matter more)
	- Exclude target: Don't count prompt words in the response
	- Normalize: Map scores to a 1-7 scale (model-specific calibration)
	- Elaboration: Measure response length/complexity

	Note: These models are English-only. Non-English words (e.g. Chinese, Arabic) will not be found in the vocabulary and will be skipped. For multilingual scoring, use [Ocsai](https://openscoring.du.edu/ocsai).
	"""


	# OCS color theme — matches openscoring.du.edu semantic theme (vibrant green)
	OCS_THEME = gr.themes.Default(
	primary_hue=gr.themes.colors.Color(
	name="ocs-green",
	c50="#f0faf0",
	c100="#dcf5dc",
	c200="#b8ecb8",
	c300="#85dc85",
	c400="#4cc44c",
	c500="#2ea82e", # vibrant green — hsl(145, 65%, 42%) approx
	c600="#258c25",
	c700="#1d701d",
	c800="#165416",
	c900="#0e380e",
	c950="#071c07",
	),
	neutral_hue=gr.themes.colors.slate,
	font=[gr.themes.GoogleFont("Inter"), "sans-serif"],
	).set(
	button_primary_background_fill="*primary_500",
	button_primary_background_fill_hover="*primary_600",
	block_label_text_size="sm",
	)


	# Build UI
	with gr.Blocks(title="OCS Semantic Scoring", theme=OCS_THEME) as demo:
	gr.Markdown("# OCS Semantic Scoring")
	gr.Markdown(
	"Score divergent thinking originality using semantic distance in word embedding space. "
	"Part of [Open Creativity Scoring](https://openscoring.du.edu) · "
	"For higher accuracy, try [Ocsai LLM scoring](https://openscoring.du.edu/ocsai)."
	)

	# Model choices for dropdowns
	model_choices = [(MODELS[k]["description"], k) for k in MODELS]

	# Load model controls
	with gr.Row():
	model_selector = gr.Dropdown(
	label="Model",
	choices=model_choices,
	value=DEFAULT_MODEL,
	)
	load_btn = gr.Button("Load Model", variant="primary")
	load_status = gr.Textbox(label="Model Status", value="Model not loaded yet. Click 'Load Model' or score something to auto-load.", interactive=False)
	load_btn.click(fn=load_model, inputs=model_selector, outputs=load_status)

	with gr.Tabs():
	with gr.TabItem("Single Score"):
	with gr.Row():
	with gr.Column():
	prompt_input = gr.Textbox(label="Prompt (object)", placeholder="e.g., brick", lines=1)
	response_input = gr.Textbox(label="Response", placeholder="e.g., modern art sculpture", lines=2)

	with gr.Row():
	stopword = gr.Checkbox(label="Stopword filtering", value=True)
	term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True)
	with gr.Row():
	exclude_tgt = gr.Checkbox(label="Exclude target words", value=True)
	norm = gr.Checkbox(label="Normalize (1-7)", value=False)

	elab = gr.Dropdown(
	label="Elaboration method",
	choices=["none", "whitespace", "stoplist", "idf", "pos"],
	value="none",
	)
	score_btn = gr.Button("Score", variant="primary")

	with gr.Column():
	result_output = gr.Textbox(label="Result", lines=4, interactive=False)

	score_btn.click(
	fn=score_single,
	inputs=[prompt_input, response_input, model_selector, stopword, term_weight, exclude_tgt, norm, elab],
	outputs=result_output,
	)

	gr.Examples(
	examples=[
	["brick", "doorstop"],
	["brick", "modern art sculpture displayed in a gallery"],
	["paperclip", "emergency lockpick for escaping a submarine"],
	["shoe", "flower pot for a tiny cactus"],
	],
	inputs=[prompt_input, response_input],
	)

	with gr.TabItem("Batch Score (CSV)"):
	gr.Markdown(
	"Upload a CSV with `prompt` and `response` columns. "
	"If no headers, the first two columns are used."
	)
	with gr.Row():
	with gr.Column():
	file_input = gr.File(label="Upload CSV", file_types=[".csv"])

	with gr.Row():
	b_stopword = gr.Checkbox(label="Stopword filtering", value=True)
	b_term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True)
	with gr.Row():
	b_exclude_tgt = gr.Checkbox(label="Exclude target words", value=True)
	b_norm = gr.Checkbox(label="Normalize (1-7)", value=False)

	b_elab = gr.Dropdown(
	label="Elaboration method",
	choices=["none", "whitespace", "stoplist", "idf", "pos"],
	value="none",
	)
	batch_btn = gr.Button("Score File", variant="primary")

	with gr.Column():
	file_output = gr.File(label="Download scored CSV")
	preview = gr.Textbox(label="Preview (first 20 rows)", lines=10, interactive=False)

	batch_btn.click(
	fn=score_batch,
	inputs=[file_input, model_selector, b_stopword, b_term_weight, b_exclude_tgt, b_norm, b_elab],
	outputs=[file_output, preview],
	)

	with gr.TabItem("About"):
	gr.Markdown(ABOUT_TEXT)
	gr.Markdown(CITATION_TEXT)

	if __name__ == "__main__":
	demo.launch()