Peter Organisciak
Restore GloVe to dropdown; files uploading to HF
e5d14d0
"""
OCS Semantic Scoring - Hugging Face Space
Scores creativity of divergent thinking responses using semantic distance
in word embedding space. Part of the Open Creativity Scoring project.
See: https://openscoring.du.edu
"""
import gradio as gr
import pandas as pd
import tempfile
import os
from scoring import SemanticScorer, download_model, ensure_spacy_model, MODELS, DEFAULT_MODEL
# Global scorer instances keyed by model name
scorers = {}
current_model = DEFAULT_MODEL
def get_scorer(model_name=None):
"""Get or create a scorer for the given model."""
if model_name is None:
model_name = current_model
return scorers.get(model_name)
def load_model(model_name=None, progress=gr.Progress()):
"""Download and load a model."""
global current_model
if model_name is None:
model_name = DEFAULT_MODEL
if not MODELS.get(model_name, {}).get("hosted", True):
return (
f"{model_name} is not hosted on this Space (model files are too large). "
"See the About tab for self-hosting instructions."
)
if model_name in scorers:
current_model = model_name
return f"{model_name} already loaded."
progress(0, desc="Ensuring spaCy model is available...")
ensure_spacy_model()
progress(0.1, desc=f"Downloading {model_name} from Hugging Face Hub...")
model_path = download_model(model_name)
progress(0.5, desc="Loading model into memory (this may take a moment)...")
scorer = SemanticScorer(model_name=model_name)
scorer.load_model(model_path)
scorers[model_name] = scorer
current_model = model_name
progress(1.0, desc="Ready!")
return f"{model_name} loaded successfully."
def score_single(prompt, response, model_name, stopword, term_weighting, exclude_target,
normalize, elab_method, progress=gr.Progress()):
"""Score a single prompt-response pair."""
scorer = get_scorer(model_name)
if scorer is None:
load_model(model_name, progress)
scorer = get_scorer(model_name)
if not prompt or not response:
return "Please provide both a prompt and a response."
orig = scorer.originality(
prompt.strip(), response.strip(),
stopword=stopword,
term_weighting=term_weighting,
exclude_target=exclude_target,
)
if orig is None:
result = "Could not score - no recognized words found in response."
else:
if normalize:
import numpy as np
orig = scorer._scaler.transform(np.array([[orig]]))[0, 0]
result = f"Originality: {orig:.1f} (on 1-7 scale)"
else:
result = f"Originality: {orig:.4f} (cosine distance, 0-1 scale)"
if elab_method and elab_method != "none":
elab = scorer.elaboration(response.strip(), method=elab_method)
result += f"\nElaboration ({elab_method}): {elab}"
return result
def score_batch(file, model_name, stopword, term_weighting, exclude_target, normalize,
elab_method, progress=gr.Progress()):
"""Score a CSV file of prompt-response pairs."""
scorer = get_scorer(model_name)
if scorer is None:
load_model(model_name, progress)
scorer = get_scorer(model_name)
if file is None:
return None, "Please upload a CSV file."
try:
df = pd.read_csv(file.name)
except Exception as e:
return None, f"Error reading CSV: {e}"
# Normalize column names
df.columns = [c.strip().lower() for c in df.columns]
if "prompt" not in df.columns or "response" not in df.columns:
# Try to use first two columns
if len(df.columns) >= 2:
df.columns = ["prompt", "response"] + list(df.columns[2:])
else:
return None, "CSV must have at least two columns (prompt, response)."
elab = elab_method if elab_method != "none" else None
progress(0.2, desc=f"Scoring {len(df)} responses...")
scored = scorer.score_batch(
df, stopword=stopword, term_weighting=term_weighting,
exclude_target=exclude_target, normalize=normalize,
elab_method=elab,
)
progress(0.9, desc="Preparing output...")
# Save to temp file for download
output_path = os.path.join(tempfile.gettempdir(), "scored_output.csv")
scored.to_csv(output_path, index=False)
return output_path, scored.head(20).to_string(index=False)
# Citation text
CITATION_TEXT = """
**Citations:**
Dumas, D., Organisciak, P., & Doherty, M. D. (2020). Measuring divergent thinking
originality with human raters and text-mining models: A psychometric comparison of
methods. *Psychology of Aesthetics, Creativity, and the Arts*.
https://doi.org/10/ghcsqq
Organisciak, P., Acar, S., Dumas, D., & Berthiaume, K. (2023). Beyond semantic
distance: Automated scoring of divergent thinking greatly improves with large
language models. *Thinking Skills and Creativity*, 49, 101356.
**Note:** For LLM-based scoring (the newer, recommended approach), see
[openscoring.du.edu](https://openscoring.du.edu) and the
[ocsai library](https://github.com/massivetexts/ocsai).
"""
ABOUT_TEXT = """
# OCS Semantic Scoring
Scores creativity of divergent thinking responses (e.g., Alternate Uses Task)
by measuring **semantic distance** between a prompt and response in word
embedding space.
**How it works:**
1. Looks up word vectors for the prompt and response in the selected embedding model
2. Computes cosine similarity between them
3. Subtracts from 1 to get a distance score (higher = more original)
**Available models:**
- **MOTES 100k** (default): Children's writing embeddings (ages 10–12) from the MOTES study
- **GloVe 840B**: General-purpose embeddings trained on 840B Common Crawl tokens (Pennington et al. 2014). Large vocabulary, good for adult responses. English only.
**Options:**
- **Stopword filtering**: Skip common functional words (the, and, etc.)
- **Term weighting**: Weight words by IDF (rarer words matter more)
- **Exclude target**: Don't count prompt words in the response
- **Normalize**: Map scores to a 1-7 scale (model-specific calibration)
- **Elaboration**: Measure response length/complexity
**Note:** These models are English-only. Non-English words (e.g. Chinese, Arabic) will not be found in the vocabulary and will be skipped. For multilingual scoring, use [Ocsai](https://openscoring.du.edu/ocsai).
"""
# OCS color theme — matches openscoring.du.edu semantic theme (vibrant green)
OCS_THEME = gr.themes.Default(
primary_hue=gr.themes.colors.Color(
name="ocs-green",
c50="#f0faf0",
c100="#dcf5dc",
c200="#b8ecb8",
c300="#85dc85",
c400="#4cc44c",
c500="#2ea82e", # vibrant green — hsl(145, 65%, 42%) approx
c600="#258c25",
c700="#1d701d",
c800="#165416",
c900="#0e380e",
c950="#071c07",
),
neutral_hue=gr.themes.colors.slate,
font=[gr.themes.GoogleFont("Inter"), "sans-serif"],
).set(
button_primary_background_fill="*primary_500",
button_primary_background_fill_hover="*primary_600",
block_label_text_size="sm",
)
# Build UI
with gr.Blocks(title="OCS Semantic Scoring", theme=OCS_THEME) as demo:
gr.Markdown("# OCS Semantic Scoring")
gr.Markdown(
"Score divergent thinking originality using semantic distance in word embedding space. "
"Part of [Open Creativity Scoring](https://openscoring.du.edu) · "
"For higher accuracy, try [Ocsai LLM scoring](https://openscoring.du.edu/ocsai)."
)
# Model choices for dropdowns
model_choices = [(MODELS[k]["description"], k) for k in MODELS]
# Load model controls
with gr.Row():
model_selector = gr.Dropdown(
label="Model",
choices=model_choices,
value=DEFAULT_MODEL,
)
load_btn = gr.Button("Load Model", variant="primary")
load_status = gr.Textbox(label="Model Status", value="Model not loaded yet. Click 'Load Model' or score something to auto-load.", interactive=False)
load_btn.click(fn=load_model, inputs=model_selector, outputs=load_status)
with gr.Tabs():
with gr.TabItem("Single Score"):
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Prompt (object)", placeholder="e.g., brick", lines=1)
response_input = gr.Textbox(label="Response", placeholder="e.g., modern art sculpture", lines=2)
with gr.Row():
stopword = gr.Checkbox(label="Stopword filtering", value=True)
term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True)
with gr.Row():
exclude_tgt = gr.Checkbox(label="Exclude target words", value=True)
norm = gr.Checkbox(label="Normalize (1-7)", value=False)
elab = gr.Dropdown(
label="Elaboration method",
choices=["none", "whitespace", "stoplist", "idf", "pos"],
value="none",
)
score_btn = gr.Button("Score", variant="primary")
with gr.Column():
result_output = gr.Textbox(label="Result", lines=4, interactive=False)
score_btn.click(
fn=score_single,
inputs=[prompt_input, response_input, model_selector, stopword, term_weight, exclude_tgt, norm, elab],
outputs=result_output,
)
gr.Examples(
examples=[
["brick", "doorstop"],
["brick", "modern art sculpture displayed in a gallery"],
["paperclip", "emergency lockpick for escaping a submarine"],
["shoe", "flower pot for a tiny cactus"],
],
inputs=[prompt_input, response_input],
)
with gr.TabItem("Batch Score (CSV)"):
gr.Markdown(
"Upload a CSV with `prompt` and `response` columns. "
"If no headers, the first two columns are used."
)
with gr.Row():
with gr.Column():
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
with gr.Row():
b_stopword = gr.Checkbox(label="Stopword filtering", value=True)
b_term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True)
with gr.Row():
b_exclude_tgt = gr.Checkbox(label="Exclude target words", value=True)
b_norm = gr.Checkbox(label="Normalize (1-7)", value=False)
b_elab = gr.Dropdown(
label="Elaboration method",
choices=["none", "whitespace", "stoplist", "idf", "pos"],
value="none",
)
batch_btn = gr.Button("Score File", variant="primary")
with gr.Column():
file_output = gr.File(label="Download scored CSV")
preview = gr.Textbox(label="Preview (first 20 rows)", lines=10, interactive=False)
batch_btn.click(
fn=score_batch,
inputs=[file_input, model_selector, b_stopword, b_term_weight, b_exclude_tgt, b_norm, b_elab],
outputs=[file_output, preview],
)
with gr.TabItem("About"):
gr.Markdown(ABOUT_TEXT)
gr.Markdown(CITATION_TEXT)
if __name__ == "__main__":
demo.launch()