import gradio as gr from transformers import pipeline # Space 1 — Debate Content Scorer (baseline). # # This is the intentionally bad baseline. It uses distilgpt2, a small generic # text-generation model, and tries to "continue" a debate speech as if that # were a way of scoring or responding to an argument. It isn't. The point of # this Space in Prea's arc is to show what text-only, generator-based tools # *can't* do on a task that fundamentally depends on how speech is delivered. # # See research-journal.md, Week 5, for the full write-up. generator = pipeline("text-generation", model="distilbert/distilgpt2", device=-1) WSDC_EXAMPLES = [ ( "This house would ban single-use plastics in public schools. " "The first and most important reason is that", 0.7, 100, ), ( "My opponent claims the cost of universal transit is prohibitive. " "This argument fails because", 0.7, 100, ), ( "Madam Chair, the evidence is overwhelming. In 2023 alone,", 1.0, 120, ), ( "We must act now, before it is too late. The reason is simple:", 0.5, 100, ), ] def score_speech(prompt, temperature, max_tokens): if not prompt.strip(): return "Please enter a debate prompt." result = generator( prompt, max_new_tokens=int(max_tokens), temperature=float(temperature), do_sample=True, truncation=True, ) return result[0]["generated_text"] demo = gr.Interface( fn=score_speech, inputs=[ gr.Textbox( label="Debate Speech Opening", placeholder="Enter the first line or two of a debate speech...", lines=4, value="This house would ban single-use plastics in public schools. The first and most important reason is that", ), gr.Slider( minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature", info="Lower = more predictable, Higher = more creative (and more likely to fabricate)", ), gr.Slider( minimum=40, maximum=200, value=100, step=10, label="Max New Tokens", info="How much text to generate", ), ], outputs=gr.Textbox(label="Model continuation", lines=10), title="Debate Content Scorer — Baseline (distilgpt2)", description=( "This is Prea's Space 1 — an intentionally weak baseline. " "It uses distilgpt2 (a small generic text-generation model) to 'continue' " "the opening of a debate speech. My original plan was to use this as a content " "scorer, but as soon as I tried it on real debate openings I realized that a " "text generator can't tell you whether an argument is good — it will happily " "fabricate evidence, misquote statistics, and wander off topic. That failure is " "the reason Spaces 2 and 3 exist. See research-journal.md, Week 5, for the full " "write-up, and try the examples below to watch it make up citations that don't exist." ), examples=[list(ex) for ex in WSDC_EXAMPLES], theme=gr.themes.Soft(), ) demo.launch()