""" OCS Semantic Scoring - Hugging Face Space Scores creativity of divergent thinking responses using semantic distance in word embedding space. Part of the Open Creativity Scoring project. See: https://openscoring.du.edu """ import gradio as gr import pandas as pd import tempfile import os from scoring import SemanticScorer, download_model, ensure_spacy_model, MODELS, DEFAULT_MODEL # Global scorer instances keyed by model name scorers = {} current_model = DEFAULT_MODEL def get_scorer(model_name=None): """Get or create a scorer for the given model.""" if model_name is None: model_name = current_model return scorers.get(model_name) def load_model(model_name=None, progress=gr.Progress()): """Download and load a model.""" global current_model if model_name is None: model_name = DEFAULT_MODEL if not MODELS.get(model_name, {}).get("hosted", True): return ( f"{model_name} is not hosted on this Space (model files are too large). " "See the About tab for self-hosting instructions." ) if model_name in scorers: current_model = model_name return f"{model_name} already loaded." progress(0, desc="Ensuring spaCy model is available...") ensure_spacy_model() progress(0.1, desc=f"Downloading {model_name} from Hugging Face Hub...") model_path = download_model(model_name) progress(0.5, desc="Loading model into memory (this may take a moment)...") scorer = SemanticScorer(model_name=model_name) scorer.load_model(model_path) scorers[model_name] = scorer current_model = model_name progress(1.0, desc="Ready!") return f"{model_name} loaded successfully." def score_single(prompt, response, model_name, stopword, term_weighting, exclude_target, normalize, elab_method, progress=gr.Progress()): """Score a single prompt-response pair.""" scorer = get_scorer(model_name) if scorer is None: load_model(model_name, progress) scorer = get_scorer(model_name) if not prompt or not response: return "Please provide both a prompt and a response." orig = scorer.originality( prompt.strip(), response.strip(), stopword=stopword, term_weighting=term_weighting, exclude_target=exclude_target, ) if orig is None: result = "Could not score - no recognized words found in response." else: if normalize: import numpy as np orig = scorer._scaler.transform(np.array([[orig]]))[0, 0] result = f"Originality: {orig:.1f} (on 1-7 scale)" else: result = f"Originality: {orig:.4f} (cosine distance, 0-1 scale)" if elab_method and elab_method != "none": elab = scorer.elaboration(response.strip(), method=elab_method) result += f"\nElaboration ({elab_method}): {elab}" return result def score_batch(file, model_name, stopword, term_weighting, exclude_target, normalize, elab_method, progress=gr.Progress()): """Score a CSV file of prompt-response pairs.""" scorer = get_scorer(model_name) if scorer is None: load_model(model_name, progress) scorer = get_scorer(model_name) if file is None: return None, "Please upload a CSV file." try: df = pd.read_csv(file.name) except Exception as e: return None, f"Error reading CSV: {e}" # Normalize column names df.columns = [c.strip().lower() for c in df.columns] if "prompt" not in df.columns or "response" not in df.columns: # Try to use first two columns if len(df.columns) >= 2: df.columns = ["prompt", "response"] + list(df.columns[2:]) else: return None, "CSV must have at least two columns (prompt, response)." elab = elab_method if elab_method != "none" else None progress(0.2, desc=f"Scoring {len(df)} responses...") scored = scorer.score_batch( df, stopword=stopword, term_weighting=term_weighting, exclude_target=exclude_target, normalize=normalize, elab_method=elab, ) progress(0.9, desc="Preparing output...") # Save to temp file for download output_path = os.path.join(tempfile.gettempdir(), "scored_output.csv") scored.to_csv(output_path, index=False) return output_path, scored.head(20).to_string(index=False) # Citation text CITATION_TEXT = """ **Citations:** Dumas, D., Organisciak, P., & Doherty, M. D. (2020). Measuring divergent thinking originality with human raters and text-mining models: A psychometric comparison of methods. *Psychology of Aesthetics, Creativity, and the Arts*. https://doi.org/10/ghcsqq Organisciak, P., Acar, S., Dumas, D., & Berthiaume, K. (2023). Beyond semantic distance: Automated scoring of divergent thinking greatly improves with large language models. *Thinking Skills and Creativity*, 49, 101356. **Note:** For LLM-based scoring (the newer, recommended approach), see [openscoring.du.edu](https://openscoring.du.edu) and the [ocsai library](https://github.com/massivetexts/ocsai). """ ABOUT_TEXT = """ # OCS Semantic Scoring Scores creativity of divergent thinking responses (e.g., Alternate Uses Task) by measuring **semantic distance** between a prompt and response in word embedding space. **How it works:** 1. Looks up word vectors for the prompt and response in the selected embedding model 2. Computes cosine similarity between them 3. Subtracts from 1 to get a distance score (higher = more original) **Available models:** - **MOTES 100k** (default): Children's writing embeddings (ages 10โ€“12) from the MOTES study - **GloVe 840B**: General-purpose embeddings trained on 840B Common Crawl tokens (Pennington et al. 2014). Large vocabulary, good for adult responses. English only. **Options:** - **Stopword filtering**: Skip common functional words (the, and, etc.) - **Term weighting**: Weight words by IDF (rarer words matter more) - **Exclude target**: Don't count prompt words in the response - **Normalize**: Map scores to a 1-7 scale (model-specific calibration) - **Elaboration**: Measure response length/complexity **Note:** These models are English-only. Non-English words (e.g. Chinese, Arabic) will not be found in the vocabulary and will be skipped. For multilingual scoring, use [Ocsai](https://openscoring.du.edu/ocsai). """ # OCS color theme โ€” matches openscoring.du.edu semantic theme (vibrant green) OCS_THEME = gr.themes.Default( primary_hue=gr.themes.colors.Color( name="ocs-green", c50="#f0faf0", c100="#dcf5dc", c200="#b8ecb8", c300="#85dc85", c400="#4cc44c", c500="#2ea82e", # vibrant green โ€” hsl(145, 65%, 42%) approx c600="#258c25", c700="#1d701d", c800="#165416", c900="#0e380e", c950="#071c07", ), neutral_hue=gr.themes.colors.slate, font=[gr.themes.GoogleFont("Inter"), "sans-serif"], ).set( button_primary_background_fill="*primary_500", button_primary_background_fill_hover="*primary_600", block_label_text_size="sm", ) # Build UI with gr.Blocks(title="OCS Semantic Scoring", theme=OCS_THEME) as demo: gr.Markdown("# OCS Semantic Scoring") gr.Markdown( "Score divergent thinking originality using semantic distance in word embedding space. " "Part of [Open Creativity Scoring](https://openscoring.du.edu) ยท " "For higher accuracy, try [Ocsai LLM scoring](https://openscoring.du.edu/ocsai)." ) # Model choices for dropdowns model_choices = [(MODELS[k]["description"], k) for k in MODELS] # Load model controls with gr.Row(): model_selector = gr.Dropdown( label="Model", choices=model_choices, value=DEFAULT_MODEL, ) load_btn = gr.Button("Load Model", variant="primary") load_status = gr.Textbox(label="Model Status", value="Model not loaded yet. Click 'Load Model' or score something to auto-load.", interactive=False) load_btn.click(fn=load_model, inputs=model_selector, outputs=load_status) with gr.Tabs(): with gr.TabItem("Single Score"): with gr.Row(): with gr.Column(): prompt_input = gr.Textbox(label="Prompt (object)", placeholder="e.g., brick", lines=1) response_input = gr.Textbox(label="Response", placeholder="e.g., modern art sculpture", lines=2) with gr.Row(): stopword = gr.Checkbox(label="Stopword filtering", value=True) term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True) with gr.Row(): exclude_tgt = gr.Checkbox(label="Exclude target words", value=True) norm = gr.Checkbox(label="Normalize (1-7)", value=False) elab = gr.Dropdown( label="Elaboration method", choices=["none", "whitespace", "stoplist", "idf", "pos"], value="none", ) score_btn = gr.Button("Score", variant="primary") with gr.Column(): result_output = gr.Textbox(label="Result", lines=4, interactive=False) score_btn.click( fn=score_single, inputs=[prompt_input, response_input, model_selector, stopword, term_weight, exclude_tgt, norm, elab], outputs=result_output, ) gr.Examples( examples=[ ["brick", "doorstop"], ["brick", "modern art sculpture displayed in a gallery"], ["paperclip", "emergency lockpick for escaping a submarine"], ["shoe", "flower pot for a tiny cactus"], ], inputs=[prompt_input, response_input], ) with gr.TabItem("Batch Score (CSV)"): gr.Markdown( "Upload a CSV with `prompt` and `response` columns. " "If no headers, the first two columns are used." ) with gr.Row(): with gr.Column(): file_input = gr.File(label="Upload CSV", file_types=[".csv"]) with gr.Row(): b_stopword = gr.Checkbox(label="Stopword filtering", value=True) b_term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True) with gr.Row(): b_exclude_tgt = gr.Checkbox(label="Exclude target words", value=True) b_norm = gr.Checkbox(label="Normalize (1-7)", value=False) b_elab = gr.Dropdown( label="Elaboration method", choices=["none", "whitespace", "stoplist", "idf", "pos"], value="none", ) batch_btn = gr.Button("Score File", variant="primary") with gr.Column(): file_output = gr.File(label="Download scored CSV") preview = gr.Textbox(label="Preview (first 20 rows)", lines=10, interactive=False) batch_btn.click( fn=score_batch, inputs=[file_input, model_selector, b_stopword, b_term_weight, b_exclude_tgt, b_norm, b_elab], outputs=[file_output, preview], ) with gr.TabItem("About"): gr.Markdown(ABOUT_TEXT) gr.Markdown(CITATION_TEXT) if __name__ == "__main__": demo.launch()