| """ |
| OCS Semantic Scoring - Hugging Face Space |
| |
| Scores creativity of divergent thinking responses using semantic distance |
| in word embedding space. Part of the Open Creativity Scoring project. |
| |
| See: https://openscoring.du.edu |
| """ |
|
|
| import gradio as gr |
| import pandas as pd |
| import tempfile |
| import os |
| from scoring import SemanticScorer, download_model, ensure_spacy_model, MODELS, DEFAULT_MODEL |
|
|
| |
| scorers = {} |
| current_model = DEFAULT_MODEL |
|
|
|
|
| def get_scorer(model_name=None): |
| """Get or create a scorer for the given model.""" |
| if model_name is None: |
| model_name = current_model |
| return scorers.get(model_name) |
|
|
|
|
| def load_model(model_name=None, progress=gr.Progress()): |
| """Download and load a model.""" |
| global current_model |
| if model_name is None: |
| model_name = DEFAULT_MODEL |
|
|
| if not MODELS.get(model_name, {}).get("hosted", True): |
| return ( |
| f"{model_name} is not hosted on this Space (model files are too large). " |
| "See the About tab for self-hosting instructions." |
| ) |
|
|
| if model_name in scorers: |
| current_model = model_name |
| return f"{model_name} already loaded." |
|
|
| progress(0, desc="Ensuring spaCy model is available...") |
| ensure_spacy_model() |
|
|
| progress(0.1, desc=f"Downloading {model_name} from Hugging Face Hub...") |
| model_path = download_model(model_name) |
|
|
| progress(0.5, desc="Loading model into memory (this may take a moment)...") |
| scorer = SemanticScorer(model_name=model_name) |
| scorer.load_model(model_path) |
| scorers[model_name] = scorer |
| current_model = model_name |
|
|
| progress(1.0, desc="Ready!") |
| return f"{model_name} loaded successfully." |
|
|
|
|
| def score_single(prompt, response, model_name, stopword, term_weighting, exclude_target, |
| normalize, elab_method, progress=gr.Progress()): |
| """Score a single prompt-response pair.""" |
| scorer = get_scorer(model_name) |
| if scorer is None: |
| load_model(model_name, progress) |
| scorer = get_scorer(model_name) |
|
|
| if not prompt or not response: |
| return "Please provide both a prompt and a response." |
|
|
| orig = scorer.originality( |
| prompt.strip(), response.strip(), |
| stopword=stopword, |
| term_weighting=term_weighting, |
| exclude_target=exclude_target, |
| ) |
|
|
| if orig is None: |
| result = "Could not score - no recognized words found in response." |
| else: |
| if normalize: |
| import numpy as np |
| orig = scorer._scaler.transform(np.array([[orig]]))[0, 0] |
| result = f"Originality: {orig:.1f} (on 1-7 scale)" |
| else: |
| result = f"Originality: {orig:.4f} (cosine distance, 0-1 scale)" |
|
|
| if elab_method and elab_method != "none": |
| elab = scorer.elaboration(response.strip(), method=elab_method) |
| result += f"\nElaboration ({elab_method}): {elab}" |
|
|
| return result |
|
|
|
|
| def score_batch(file, model_name, stopword, term_weighting, exclude_target, normalize, |
| elab_method, progress=gr.Progress()): |
| """Score a CSV file of prompt-response pairs.""" |
| scorer = get_scorer(model_name) |
| if scorer is None: |
| load_model(model_name, progress) |
| scorer = get_scorer(model_name) |
|
|
| if file is None: |
| return None, "Please upload a CSV file." |
|
|
| try: |
| df = pd.read_csv(file.name) |
| except Exception as e: |
| return None, f"Error reading CSV: {e}" |
|
|
| |
| df.columns = [c.strip().lower() for c in df.columns] |
|
|
| if "prompt" not in df.columns or "response" not in df.columns: |
| |
| if len(df.columns) >= 2: |
| df.columns = ["prompt", "response"] + list(df.columns[2:]) |
| else: |
| return None, "CSV must have at least two columns (prompt, response)." |
|
|
| elab = elab_method if elab_method != "none" else None |
|
|
| progress(0.2, desc=f"Scoring {len(df)} responses...") |
| scored = scorer.score_batch( |
| df, stopword=stopword, term_weighting=term_weighting, |
| exclude_target=exclude_target, normalize=normalize, |
| elab_method=elab, |
| ) |
| progress(0.9, desc="Preparing output...") |
|
|
| |
| output_path = os.path.join(tempfile.gettempdir(), "scored_output.csv") |
| scored.to_csv(output_path, index=False) |
|
|
| return output_path, scored.head(20).to_string(index=False) |
|
|
|
|
| |
| CITATION_TEXT = """ |
| **Citations:** |
| |
| Dumas, D., Organisciak, P., & Doherty, M. D. (2020). Measuring divergent thinking |
| originality with human raters and text-mining models: A psychometric comparison of |
| methods. *Psychology of Aesthetics, Creativity, and the Arts*. |
| https://doi.org/10/ghcsqq |
| |
| Organisciak, P., Acar, S., Dumas, D., & Berthiaume, K. (2023). Beyond semantic |
| distance: Automated scoring of divergent thinking greatly improves with large |
| language models. *Thinking Skills and Creativity*, 49, 101356. |
| |
| **Note:** For LLM-based scoring (the newer, recommended approach), see |
| [openscoring.du.edu](https://openscoring.du.edu) and the |
| [ocsai library](https://github.com/massivetexts/ocsai). |
| """ |
|
|
| ABOUT_TEXT = """ |
| # OCS Semantic Scoring |
| |
| Scores creativity of divergent thinking responses (e.g., Alternate Uses Task) |
| by measuring **semantic distance** between a prompt and response in word |
| embedding space. |
| |
| **How it works:** |
| 1. Looks up word vectors for the prompt and response in the selected embedding model |
| 2. Computes cosine similarity between them |
| 3. Subtracts from 1 to get a distance score (higher = more original) |
| |
| **Available models:** |
| - **MOTES 100k** (default): Children's writing embeddings (ages 10–12) from the MOTES study |
| - **GloVe 840B**: General-purpose embeddings trained on 840B Common Crawl tokens (Pennington et al. 2014). Large vocabulary, good for adult responses. English only. |
| |
| **Options:** |
| - **Stopword filtering**: Skip common functional words (the, and, etc.) |
| - **Term weighting**: Weight words by IDF (rarer words matter more) |
| - **Exclude target**: Don't count prompt words in the response |
| - **Normalize**: Map scores to a 1-7 scale (model-specific calibration) |
| - **Elaboration**: Measure response length/complexity |
| |
| **Note:** These models are English-only. Non-English words (e.g. Chinese, Arabic) will not be found in the vocabulary and will be skipped. For multilingual scoring, use [Ocsai](https://openscoring.du.edu/ocsai). |
| """ |
|
|
|
|
| |
| OCS_THEME = gr.themes.Default( |
| primary_hue=gr.themes.colors.Color( |
| name="ocs-green", |
| c50="#f0faf0", |
| c100="#dcf5dc", |
| c200="#b8ecb8", |
| c300="#85dc85", |
| c400="#4cc44c", |
| c500="#2ea82e", |
| c600="#258c25", |
| c700="#1d701d", |
| c800="#165416", |
| c900="#0e380e", |
| c950="#071c07", |
| ), |
| neutral_hue=gr.themes.colors.slate, |
| font=[gr.themes.GoogleFont("Inter"), "sans-serif"], |
| ).set( |
| button_primary_background_fill="*primary_500", |
| button_primary_background_fill_hover="*primary_600", |
| block_label_text_size="sm", |
| ) |
|
|
|
|
| |
| with gr.Blocks(title="OCS Semantic Scoring", theme=OCS_THEME) as demo: |
| gr.Markdown("# OCS Semantic Scoring") |
| gr.Markdown( |
| "Score divergent thinking originality using semantic distance in word embedding space. " |
| "Part of [Open Creativity Scoring](https://openscoring.du.edu) · " |
| "For higher accuracy, try [Ocsai LLM scoring](https://openscoring.du.edu/ocsai)." |
| ) |
|
|
| |
| model_choices = [(MODELS[k]["description"], k) for k in MODELS] |
|
|
| |
| with gr.Row(): |
| model_selector = gr.Dropdown( |
| label="Model", |
| choices=model_choices, |
| value=DEFAULT_MODEL, |
| ) |
| load_btn = gr.Button("Load Model", variant="primary") |
| load_status = gr.Textbox(label="Model Status", value="Model not loaded yet. Click 'Load Model' or score something to auto-load.", interactive=False) |
| load_btn.click(fn=load_model, inputs=model_selector, outputs=load_status) |
|
|
| with gr.Tabs(): |
| with gr.TabItem("Single Score"): |
| with gr.Row(): |
| with gr.Column(): |
| prompt_input = gr.Textbox(label="Prompt (object)", placeholder="e.g., brick", lines=1) |
| response_input = gr.Textbox(label="Response", placeholder="e.g., modern art sculpture", lines=2) |
|
|
| with gr.Row(): |
| stopword = gr.Checkbox(label="Stopword filtering", value=True) |
| term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True) |
| with gr.Row(): |
| exclude_tgt = gr.Checkbox(label="Exclude target words", value=True) |
| norm = gr.Checkbox(label="Normalize (1-7)", value=False) |
|
|
| elab = gr.Dropdown( |
| label="Elaboration method", |
| choices=["none", "whitespace", "stoplist", "idf", "pos"], |
| value="none", |
| ) |
| score_btn = gr.Button("Score", variant="primary") |
|
|
| with gr.Column(): |
| result_output = gr.Textbox(label="Result", lines=4, interactive=False) |
|
|
| score_btn.click( |
| fn=score_single, |
| inputs=[prompt_input, response_input, model_selector, stopword, term_weight, exclude_tgt, norm, elab], |
| outputs=result_output, |
| ) |
|
|
| gr.Examples( |
| examples=[ |
| ["brick", "doorstop"], |
| ["brick", "modern art sculpture displayed in a gallery"], |
| ["paperclip", "emergency lockpick for escaping a submarine"], |
| ["shoe", "flower pot for a tiny cactus"], |
| ], |
| inputs=[prompt_input, response_input], |
| ) |
|
|
| with gr.TabItem("Batch Score (CSV)"): |
| gr.Markdown( |
| "Upload a CSV with `prompt` and `response` columns. " |
| "If no headers, the first two columns are used." |
| ) |
| with gr.Row(): |
| with gr.Column(): |
| file_input = gr.File(label="Upload CSV", file_types=[".csv"]) |
|
|
| with gr.Row(): |
| b_stopword = gr.Checkbox(label="Stopword filtering", value=True) |
| b_term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True) |
| with gr.Row(): |
| b_exclude_tgt = gr.Checkbox(label="Exclude target words", value=True) |
| b_norm = gr.Checkbox(label="Normalize (1-7)", value=False) |
|
|
| b_elab = gr.Dropdown( |
| label="Elaboration method", |
| choices=["none", "whitespace", "stoplist", "idf", "pos"], |
| value="none", |
| ) |
| batch_btn = gr.Button("Score File", variant="primary") |
|
|
| with gr.Column(): |
| file_output = gr.File(label="Download scored CSV") |
| preview = gr.Textbox(label="Preview (first 20 rows)", lines=10, interactive=False) |
|
|
| batch_btn.click( |
| fn=score_batch, |
| inputs=[file_input, model_selector, b_stopword, b_term_weight, b_exclude_tgt, b_norm, b_elab], |
| outputs=[file_output, preview], |
| ) |
|
|
| with gr.TabItem("About"): |
| gr.Markdown(ABOUT_TEXT) |
| gr.Markdown(CITATION_TEXT) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|