Spaces:
Sleeping
Sleeping
| """ | |
| ParliaBench Demo β Hugging Face Space | |
| Interactive inference demo for LLM-generated UK parliamentary speeches. | |
| Based on: | |
| "ParliaBench: An Evaluation and Benchmarking Framework for | |
| LLM-Generated Parliamentary Speech" | |
| Argyro Tsipi, NTUA Diploma Thesis, October 2025 | |
| Repos: | |
| Models β argyro/parliabench-{model}-lora | |
| Dataset β argyro/parliabench-gb-processed | |
| Space β argyro/parliabench-demo | |
| """ | |
| import json | |
| import re | |
| import time | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from utils import ( | |
| PARTIES, EUROVOC_TOPICS, HOUSES, MODELS, MODEL_FAMILY, MODEL_CONFIG, | |
| DEFAULT_GEN_PARAMS, get_valid_houses, get_orientation, | |
| build_context_string, count_tokens_approx, validate_speech, | |
| ) | |
| from prompt_templates import build_full_prompt | |
| # βββ Model cache ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _model_cache: dict = {} | |
| def _load_model_and_tokenizer(model_display_name: str): | |
| """Load (and cache) model + tokenizer for the given display name.""" | |
| if model_display_name in _model_cache: | |
| return _model_cache[model_display_name] | |
| repo_id = MODELS[model_display_name] | |
| family = MODEL_FAMILY[model_display_name] | |
| is_ft = "fine-tuned" in model_display_name | |
| base_repo = MODEL_CONFIG[family]["base_model"] | |
| tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| device_map = "auto" if torch.cuda.is_available() else None | |
| dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| if is_ft: | |
| # Load base model, then apply LoRA adapter | |
| from peft import PeftModel | |
| base = AutoModelForCausalLM.from_pretrained( | |
| base_repo, torch_dtype=dtype, device_map=device_map, | |
| trust_remote_code=True, | |
| ) | |
| model = PeftModel.from_pretrained(base, repo_id) | |
| else: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| repo_id, torch_dtype=dtype, device_map=device_map, | |
| trust_remote_code=True, | |
| ) | |
| model.eval() | |
| _model_cache[model_display_name] = (model, tokenizer) | |
| return model, tokenizer | |
| # βββ Speech extraction (mirrors extract_speech in speech_generator.py) ββββββββ | |
| def _extract_speech(raw_text: str, family: str) -> str: | |
| """Extract clean speech from raw decoded model output.""" | |
| cfg = MODEL_CONFIG[family] | |
| # Find start marker | |
| start = cfg["start_marker"] | |
| if start in raw_text: | |
| parts = raw_text.split(start) | |
| speech = parts[-1].lstrip("\n") | |
| else: | |
| speech = raw_text | |
| # Truncate at end marker | |
| for em in cfg["end_markers"]: | |
| if em in speech: | |
| speech = speech.split(em)[0] | |
| break | |
| # Remove special tokens | |
| for tok in cfg["special_tokens_to_remove"]: | |
| speech = speech.replace(tok, "") | |
| # Remove template artefacts | |
| for art in ["Context:", "Instruction:", "EUROVOC TOPIC:", "SECTION:", | |
| "PARTY:", "POLITICAL ORIENTATION:", "HOUSE:", | |
| "\nuser", "\nassistant", "\nsystem"]: | |
| if art in speech: | |
| speech = speech.split(art)[0] | |
| # Strip meta-commentary prefixes | |
| _strip_prefixes = [ | |
| "Thank you for providing", "Thank you for your instruction", | |
| "Here is my speech:", "Here is my response:", "Response:", | |
| "Based on your specifications", "Based on the context provided", | |
| ] | |
| sl = speech.lower() | |
| for prefix in _strip_prefixes: | |
| if sl.startswith(prefix.lower()): | |
| if prefix.endswith(":"): | |
| speech = speech[len(prefix):].lstrip() | |
| else: | |
| cut = speech.find("\n\n") | |
| if 0 < cut < 200: | |
| speech = speech[cut + 2:].strip() | |
| else: | |
| cut = speech.find("\n") | |
| if 0 < cut < 150: | |
| speech = speech[cut + 1:].strip() | |
| break | |
| # Llama reserved tokens | |
| speech = re.sub(r"<\|reserved_special_token_\d+\|>", "", speech) | |
| speech = re.sub(r"<\|[^|]*\|>", "", speech) | |
| # Whitespace | |
| speech = re.sub(r"\n{3,}", "\n\n", speech) | |
| speech = re.sub(r" {2,}", " ", speech) | |
| speech = speech.strip() | |
| # Leading punctuation artefacts | |
| speech = re.sub(r"^[^\w\s\"'(]+", "", speech).lstrip() | |
| speech = re.sub(r"^\.{2,}\s*", "", speech) | |
| # HTML tags / trailing dashes | |
| speech = re.sub(r"</?[a-zA-Z][^>]*>", "", speech) | |
| speech = re.sub(r"----+\s*\.?\s*$", "", speech) | |
| # Qwen: literal escape sequences | |
| if "\\n" in speech or "\\t" in speech: | |
| speech = speech.replace("\\n", "\n").replace("\\t", " ") | |
| # Markdown | |
| speech = re.sub(r"^#+\s+", "", speech) | |
| speech = re.sub(r"\n#+\s+", "\n", speech) | |
| speech = re.sub(r"\n?```\.?", "", speech) | |
| speech = speech.strip() | |
| # Final punctuation | |
| if speech and not speech.endswith((".", "!", "?", '"', "'")): | |
| speech = speech.rstrip() + "." | |
| return speech | |
| # βββ Main generation function βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_speech( | |
| model_display_name: str, | |
| party: str, | |
| topic: str, | |
| section: str, | |
| house: str, | |
| instruction_input: str, | |
| temperature: float, | |
| top_p: float, | |
| repetition_penalty: float, | |
| max_new_tokens: int, | |
| min_words: int, | |
| max_words: int, | |
| ): | |
| """Generate a parliamentary speech and return (speech, prompt, stats, params).""" | |
| family = MODEL_FAMILY[model_display_name] | |
| cfg = MODEL_CONFIG[family] | |
| instruction = (instruction_input.strip() | |
| if instruction_input and instruction_input.strip() | |
| else f"Address the debate on {section} on {topic}.") | |
| full_prompt = build_full_prompt( | |
| model_family=family, | |
| party=party, | |
| topic=topic, | |
| section=section, | |
| house=house, | |
| instruction=instruction, | |
| min_words=int(min_words), | |
| max_words=int(max_words), | |
| ) | |
| prompt_tokens = count_tokens_approx(full_prompt) | |
| try: | |
| model, tokenizer = _load_model_and_tokenizer(model_display_name) | |
| except Exception as exc: | |
| return ( | |
| f"β οΈ Model loading failed:\n{exc}\n\n" | |
| "Make sure the model repository exists on Hugging Face " | |
| "and you have sufficient GPU memory (β₯16 GB recommended).", | |
| full_prompt, | |
| "*Model loading error β see output above.*", | |
| "", | |
| ) | |
| inputs = tokenizer([full_prompt], return_tensors="pt").to(model.device) | |
| in_len = inputs["input_ids"].shape[-1] | |
| pad_id = tokenizer.pad_token_id or tokenizer.eos_token_id | |
| t0 = time.time() | |
| with torch.no_grad(): | |
| out_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=int(max_new_tokens), | |
| do_sample=True, | |
| temperature=float(temperature), | |
| top_p=float(top_p), | |
| repetition_penalty=float(repetition_penalty), | |
| pad_token_id=pad_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| stop_strings=cfg["stop_strings"], | |
| tokenizer=tokenizer, | |
| use_cache=True, | |
| ) | |
| elapsed = time.time() - t0 | |
| raw = tokenizer.decode(out_ids[0], skip_special_tokens=False) | |
| speech = _extract_speech(raw, family) | |
| is_valid, reason = validate_speech(speech, int(min_words), int(max_words)) | |
| wc = len(speech.split()) | |
| stats = ( | |
| f"**Tokens in prompt:** ~{prompt_tokens} | " | |
| f"**Words generated:** {wc} | " | |
| f"**Time:** {elapsed:.1f}s | " | |
| f"**Validation:** {'β ' + reason if is_valid else 'β οΈ ' + reason}" | |
| ) | |
| params_used = ( | |
| f"temperature={temperature}, top_p={top_p}, " | |
| f"repetition_penalty={repetition_penalty}, max_new_tokens={max_new_tokens}" | |
| ) | |
| return speech, full_prompt, stats, params_used | |
| # βββ Sample gallery βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with open("sample_data.json") as _f: | |
| SAMPLES = json.load(_f) | |
| def _render_sample(s: dict) -> str: | |
| if s.get("is_finetuned"): | |
| border_color = "#2d9e6b" | |
| badge_bg, badge_color = "#d4f0e4", "#1a6b45" | |
| badge_text = "β¦ Fine-tuned" | |
| else: | |
| border_color = "#8a8a8a" | |
| badge_bg, badge_color = "#e8e8e8", "#444444" | |
| badge_text = "β Baseline" | |
| meta = ( | |
| f'<div style="background:#f8f9fa;border-left:4px solid {border_color};' | |
| f'border-radius:0 8px 8px 0;padding:14px 18px;margin-bottom:14px;' | |
| f'color:#222222;font-size:.92em;line-height:1.9;">' | |
| f'<span style="display:inline-block;padding:3px 12px;border-radius:12px;' | |
| f'background:{badge_bg};color:{badge_color};font-weight:700;' | |
| f'font-size:.82em;margin-bottom:8px;">{badge_text}</span><br>' | |
| f'<strong style="color:#222;">Model</strong> {s["model"]}<br>' | |
| f'<strong style="color:#222;">Party</strong> {s["party"]} Β· ' | |
| f'<strong style="color:#222;">Orientation</strong> {s["orientation"]}<br>' | |
| f'<strong style="color:#222;">Topic</strong> {s["topic"]} Β· ' | |
| f'<strong style="color:#222;">Section</strong> {s["section"]}<br>' | |
| f'<strong style="color:#222;">House</strong> {s["house"]} Β· ' | |
| f'<strong style="color:#222;">Words</strong> {s["word_count"]}' | |
| f'</div>' | |
| ) | |
| speech = ( | |
| f'<div style="background:#ffffff;border:1px solid #e0e0e0;border-radius:8px;' | |
| f'padding:20px 24px;font-size:.95em;line-height:1.8;color:#1a1a1a;">' | |
| f'{s["speech"]}</div>' | |
| ) | |
| return meta + speech | |
| # βββ Dynamic UI helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _update_house(party): | |
| valid = get_valid_houses(party) | |
| return gr.update(choices=valid, value=valid[0]) | |
| def _update_orientation(party): | |
| return gr.update(value=get_orientation(party)) | |
| # βββ Gradio app βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| #title { text-align: center; margin-bottom: .4em; } | |
| #sub { text-align: center; color: #666; margin-bottom: 1.4em; font-size: .9em; } | |
| #speech textarea { font-size: .95em; line-height: 1.65; } | |
| #prompt textarea { font-family: monospace; font-size: .78em; } | |
| .tab-nav { justify-content: center !important; } | |
| .tab-nav button { font-size: .95em !important; } | |
| """ | |
| with gr.Blocks(css=CSS, title="ParliaBench Demo") as demo: | |
| gr.HTML(""" | |
| <style> | |
| table, table th, table td { | |
| color: black !important; | |
| } | |
| </style> | |
| """) | |
| gr.Markdown("# ParliaBench β UK Parliamentary Speech Generation", | |
| elem_id="title") | |
| gr.Markdown( | |
| "Inference demo for five LLMs fine-tuned on **ParlaMint-GB** with QLoRA \n" | |
| "Koniaris, Tsipi & Tsanakas Β· [arXiv:2511.08247](https://arxiv.org/abs/2511.08247) Β· NTUA 2025 \n" | |
| "[π Project Page](https://argyrotsipi.github.io/ParliaBench/)", | |
| elem_id="sub", | |
| ) | |
| with gr.Tabs(): | |
| # ββ Tab 1: About ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("About"): | |
| gr.HTML(""" | |
| <style> | |
| /* Main section titles */ | |
| .pb-section h2 { | |
| color: #00C389 !important; | |
| } | |
| /* Standalone h2 titles (e.g., About ParliaBench, Models, Dataset, Speech Validation) */ | |
| h2 { | |
| color: #00C389 !important; | |
| } | |
| /* Uppercase mini section labels (e.g., QLORA CONFIGURATION, POLITICAL PARTIES IN DATASET, GENERATION PARAMETERS) */ | |
| div[style*="text-transform:uppercase"] { | |
| color: #00C389 !important; | |
| } | |
| /* Model-specific small headers (Mistral, Llama 3.1, Gemma, etc.) */ | |
| .pb-template + p, | |
| div p[style*="font-weight:600"] { | |
| color: #00C389 !important; | |
| } | |
| .pb-card { | |
| background:#f8f9fc; | |
| border:1px solid #dde3ee; | |
| border-radius:12px; | |
| overflow:hidden; | |
| margin:12px 0 20px; | |
| } | |
| .pb-card table { | |
| border-collapse:collapse; | |
| width:100%; | |
| font-size:.88em; | |
| } | |
| .pb-card thead tr { | |
| background:#e8eef6; | |
| } | |
| .pb-card thead th { | |
| padding:9px 14px; | |
| color:#1a3a5c; | |
| font-weight:600; | |
| text-align:left; | |
| letter-spacing:.01em; | |
| } | |
| .pb-card thead th.r { text-align:right; } | |
| .pb-card thead th.c { text-align:center; } | |
| .pb-card tbody tr:nth-child(odd) { background:#f8f9fc; } | |
| .pb-card tbody tr:nth-child(even) { background:#ffffff; } | |
| .pb-card tbody td { | |
| padding:7px 14px; | |
| color:#222; | |
| vertical-align:middle; | |
| } | |
| .pb-card tbody td.r { text-align:right; font-weight:600; } | |
| .pb-card tbody td.c { text-align:center; color:#555; } | |
| .pb-card tbody td.mono { font-family:monospace; font-size:.9em; color:#555; } | |
| .pb-section { | |
| border-top:1px solid #dde3ee; | |
| margin:24px 0 6px; | |
| padding-top:18px; | |
| } | |
| .pb-pre { | |
| background:#f4f6f9; | |
| border:1px solid #dde3ee; | |
| border-radius:8px; | |
| padding:12px 16px; | |
| font-size:.83em; | |
| color:#222 !important; | |
| line-height:1.6; | |
| overflow-x:auto; | |
| font-family:monospace; | |
| } | |
| .pb-template { | |
| background:#f4f6f9; | |
| border:1px solid #c5d2e8; | |
| border-left:3px solid #2d5282; | |
| border-radius:0 8px 8px 0; | |
| padding:10px 14px; | |
| font-size:.79em; | |
| color:#222 !important;; | |
| line-height:1.6; | |
| font-family:monospace; | |
| overflow-x:auto; | |
| margin:0; | |
| } | |
| .pb-template.purple { | |
| border-left-color:#7b5ea7; | |
| border-color:#ccc5e0; | |
| background:#f6f4fc; | |
| } | |
| .pb-val-step { | |
| padding:7px 14px; | |
| color:#222; | |
| vertical-align:middle; | |
| line-height:1.55; | |
| } | |
| .pb-val-num { | |
| font-weight:700; | |
| color:#2d5282; | |
| text-align:center; | |
| padding:7px 10px; | |
| } | |
| .pb-val-label { | |
| font-weight:600; | |
| padding:7px 14px; | |
| color:#222; | |
| min-width:140px; | |
| } | |
| </style> | |
| <div style="color:#222;font-size:.92em;line-height:1.8;padding:4px 0 8px 0;"> | |
| <h2 style="color:#1e2a3a;margin-bottom:6px;">About ParliaBench</h2> | |
| <p><strong>ParliaBench</strong> is a benchmark and evaluation framework for LLM-generated UK parliamentary speeches, | |
| combining a curated dataset, multi-dimensional evaluation metrics, and five domain-specific fine-tuned models.<br> | |
| Paper: <a href="https://arxiv.org/abs/2511.08247" style="color:#4a7fa5;">arXiv:2511.08247</a></p> | |
| <!-- ββ Dataset βββββββββββββββββββββββββββββββββββββββββββββββββββββββ --> | |
| <div class="pb-section"><h2 style="color:#1e2a3a;margin:0 0 6px;">Dataset</h2></div> | |
| <p>Constructed from the UK subset of the <strong>ParlaMint corpus</strong>, 2015β2022. | |
| Four-step pipeline: XML parsing β metadata alignment β content filtering β EuroVoc thematic classification.</p> | |
| <div style="display:flex;gap:20px;flex-wrap:wrap;margin:14px 0 4px;"> | |
| <div style="flex:1;min-width:240px;"> | |
| <p style="font-weight:600;color:#2c3a4a;margin-bottom:6px;font-size:.85em;text-transform:uppercase;letter-spacing:.05em;">Corpus Statistics</p> | |
| <div class="pb-card"> | |
| <table> | |
| <thead><tr><th>Statistic</th><th class="r">Value</th></tr></thead> | |
| <tbody> | |
| <tr><td>Total speeches</td><td class="r">447,778</td></tr> | |
| <tr><td>Unique speakers</td><td class="r">1,901</td></tr> | |
| <tr><td>Political affiliations</td><td class="r">11</td></tr> | |
| <tr><td>Total words</td><td class="r">~99.94 million</td></tr> | |
| <tr><td>Mean speech length</td><td class="r">223 words</td></tr> | |
| <tr><td>Median speech length</td><td class="r">99 words</td></tr> | |
| <tr><td>P10 β min threshold</td><td class="r">43 words</td></tr> | |
| <tr><td>P90 β max threshold</td><td class="r">635 words</td></tr> | |
| <tr><td>EuroVoc topic domains</td><td class="r">21</td></tr> | |
| <tr><td>Temporal coverage</td><td class="r">2015β2022</td></tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| </div> | |
| <div style="flex:2;min-width:340px;"> | |
| <p style="font-weight:600;color:#2c3a4a;margin-bottom:6px;font-size:.85em;text-transform:uppercase;letter-spacing:.05em;">Political Parties in Dataset</p> | |
| <div class="pb-card"> | |
| <table> | |
| <thead><tr><th>Party</th><th class="c">Orientation</th><th class="r">Speeches</th><th class="r">Speakers</th><th class="r">Share</th></tr></thead> | |
| <tbody> | |
| <tr><td style="font-weight:600;">Conservative</td><td class="c">Centre-right</td><td class="r">263,513</td><td class="r">792</td><td class="r">58.9%</td></tr> | |
| <tr><td style="font-weight:600;">Labour</td><td class="c">Centre-left</td><td class="r">108,831</td><td class="r">592</td><td class="r">24.3%</td></tr> | |
| <tr><td style="font-weight:600;">Scottish National Party</td><td class="c">Centre-left</td><td class="r">23,562</td><td class="r">67</td><td class="r">5.3%</td></tr> | |
| <tr><td style="font-weight:600;">Liberal Democrats</td><td class="c">Centre / centre-left</td><td class="r">23,517</td><td class="r">168</td><td class="r">5.3%</td></tr> | |
| <tr><td style="font-weight:600;">Crossbench</td><td class="c">Non-partisan</td><td class="r">11,878</td><td class="r">215</td><td class="r">2.7%</td></tr> | |
| <tr><td style="font-weight:600;">Democratic Unionist Party</td><td class="c">Right</td><td class="r">6,610</td><td class="r">15</td><td class="r">1.5%</td></tr> | |
| <tr><td style="font-weight:600;">Independent</td><td class="c">Non-partisan</td><td class="r">2,783</td><td class="r">45</td><td class="r">0.6%</td></tr> | |
| <tr><td style="font-weight:600;">Plaid Cymru</td><td class="c">Centre-left to left</td><td class="r">2,229</td><td class="r">7</td><td class="r">0.5%</td></tr> | |
| <tr><td style="font-weight:600;">Green Party</td><td class="c">Left</td><td class="r">1,992</td><td class="r">3</td><td class="r">0.4%</td></tr> | |
| <tr><td style="font-weight:600;">Non-Affiliated</td><td class="c">Non-partisan</td><td class="r">1,713</td><td class="r">60</td><td class="r">0.4%</td></tr> | |
| <tr><td style="font-weight:600;">Bishops</td><td class="c">Non-partisan</td><td class="r">1,150</td><td class="r">41</td><td class="r">0.3%</td></tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| <p style="color:#888;font-size:.78em;margin-top:4px;">Bishops, Crossbench, and Non-Affiliated are formal parliamentary affiliations. Minimum threshold: 1,000 speeches.</p> | |
| </div> | |
| </div> | |
| <!-- ββ Models βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ --> | |
| <div class="pb-section"><h2 style="color:#1e2a3a;margin:0 0 6px;">Models</h2></div> | |
| <p>Five LLMs fine-tuned with <strong>QLoRA</strong> via the Unsloth framework:</p> | |
| <div class="pb-card"> | |
| <table> | |
| <thead><tr><th>Model</th><th>Base (Unsloth 4-bit)</th><th>HF Repository</th></tr></thead> | |
| <tbody> | |
| <tr><td style="font-weight:600;border-left:3px solid #4a7fa5;">Llama-3.1-8B</td><td class="mono">unsloth/Meta-Llama-3.1-8B-bnb-4bit</td><td><a href="https://huggingface.co/argyrotsipi/parliabench-unsloth-llama-3.1-8b" style="color:#4a7fa5;">argyrotsipi/parliabench-unsloth-llama-3.1-8b</a></td></tr> | |
| <tr><td style="font-weight:600;border-left:3px solid #7b5ea7;">Gemma-2-9B</td><td class="mono">unsloth/gemma-2-9b-bnb-4bit</td><td><a href="https://huggingface.co/argyrotsipi/parliabench-unsloth-gemma-2-9b" style="color:#4a7fa5;">argyrotsipi/parliabench-unsloth-gemma-2-9b</a></td></tr> | |
| <tr><td style="font-weight:600;border-left:3px solid #2d5282;">Mistral-7B</td><td class="mono">unsloth/mistral-7b-v0.3-bnb-4bit</td><td><a href="https://huggingface.co/argyrotsipi/parliabench-unsloth-mistral-7b-v0.3" style="color:#4a7fa5;">argyrotsipi/parliabench-unsloth-mistral-7b-v0.3</a></td></tr> | |
| <tr><td style="font-weight:600;border-left:3px solid #4a6080;">Qwen2-7B</td><td class="mono">unsloth/Qwen2-7B-bnb-4bit</td><td><a href="https://huggingface.co/argyrotsipi/parliabench-unsloth-qwen-2-7b" style="color:#4a7fa5;">argyrotsipi/parliabench-unsloth-qwen-2-7b</a></td></tr> | |
| <tr><td style="font-weight:600;border-left:3px solid #4a4a6a;">Yi-1.5-6B</td><td class="mono">unsloth/Yi-1.5-6B-bnb-4bit</td><td><a href="https://huggingface.co/argyrotsipi/parliabench-unsloth-yi-1.5-6b" style="color:#4a7fa5;">argyrotsipi/parliabench-unsloth-yi-1.5-6b</a></td></tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| <div style="display:flex;gap:20px;flex-wrap:wrap;"> | |
| <div style="flex:1;min-width:240px;"> | |
| <p style="font-weight:600;color:#2c3a4a;margin-bottom:6px;font-size:.85em;text-transform:uppercase;letter-spacing:.05em;">QLoRA Configuration</p> | |
| <div class="pb-card"> | |
| <table> | |
| <thead><tr><th>Parameter</th><th class="r">Value</th></tr></thead> | |
| <tbody> | |
| <tr><td>LoRA rank (r)</td><td class="r">16</td></tr> | |
| <tr><td>LoRA alpha</td><td class="r">16</td></tr> | |
| <tr><td>Target modules</td><td class="r">q, k, v, o, gate, up, down</td></tr> | |
| <tr><td>Dropout</td><td class="r">0</td></tr> | |
| <tr><td>Batch size</td><td class="r">64</td></tr> | |
| <tr><td>Learning rate</td><td class="r">2e-4</td></tr> | |
| <tr><td>Optimizer</td><td class="r">AdamW (fused)</td></tr> | |
| <tr><td>Max steps</td><td class="r">11,194 (~2 epochs)</td></tr> | |
| <tr><td>Warmup steps</td><td class="r">336</td></tr> | |
| <tr><td>Max sequence length</td><td class="r">1,024</td></tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| </div> | |
| <div style="flex:1;min-width:240px;"> | |
| <p style="font-weight:600;color:#2c3a4a;margin-bottom:6px;font-size:.85em;text-transform:uppercase;letter-spacing:.05em;">Generation Parameters</p> | |
| <div class="pb-card"> | |
| <table> | |
| <thead><tr><th>Parameter</th><th class="r">Value</th></tr></thead> | |
| <tbody> | |
| <tr><td>Temperature</td><td class="r">0.7</td></tr> | |
| <tr><td>Top-p</td><td class="r">0.85</td></tr> | |
| <tr><td>Repetition penalty</td><td class="r">1.2</td></tr> | |
| <tr><td>Max new tokens</td><td class="r">850</td></tr> | |
| <tr><td>Min words (P10)</td><td class="r">43</td></tr> | |
| <tr><td>Max words (P90)</td><td class="r">635</td></tr> | |
| <tr><td>Sampling</td><td class="r">Nucleus (top-p)</td></tr> | |
| <tr><td>Max regen attempts</td><td class="r">3</td></tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ββ Prompt Architecture βββββββββββββββββββββββββββββββββββββββββββββββ --> | |
| <div class="pb-section"><h2 style="color:#1e2a3a;margin:0 0 6px;">Prompt Architecture</h2></div> | |
| <p style="margin-bottom:6px;"><strong>System prompt β training</strong> (no word count):</p> | |
| <pre class="pb-pre">You are a seasoned UK parliamentary member. Use proper British parliamentary language | |
| appropriate for the specified House. The speech should reflect the political orientation | |
| and typical positions of the specified party on the given topic.</pre> | |
| <p style="margin-bottom:6px;margin-top:12px;"><strong>System prompt β generation</strong> (includes word count target):</p> | |
| <pre class="pb-pre">You are a seasoned UK parliamentary member. Generate a coherent speech of | |
| {min_words}-{max_words} words in standard English (no Unicode artifacts, no special | |
| characters). Use proper British parliamentary language appropriate for the specified | |
| House. The speech should reflect the political orientation and typical positions of the | |
| specified party on the given topic.</pre> | |
| <p style="margin-bottom:6px;margin-top:12px;"><strong>Context string</strong> (pipe-separated, injected as user turn):</p> | |
| <pre class="pb-pre">EUROVOC TOPIC: {topic} | SECTION: {section} | PARTY: {party} | POLITICAL ORIENTATION: {orientation} | HOUSE: {house}</pre> | |
| <p style="font-weight:600;margin-top:18px;margin-bottom:10px;color:#1a3a5c;">Model-specific chat templates</p> | |
| <div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:14px;margin-bottom:20px;"> | |
| <div> | |
| <p style="font-weight:600;color:#2d5282;margin:0 0 5px;font-size:.85em;">Mistral</p> | |
| <pre class="pb-template"><s>[INST] {SYSTEM_PROMPT} | |
| Context: {context} | |
| Instruction: {instruction} [/INST] {response}</s></pre> | |
| </div> | |
| <div> | |
| <p style="font-weight:600;color:#2d5282;margin:0 0 5px;font-size:.85em;">Llama 3.1</p> | |
| <pre class="pb-template"><|begin_of_text|><|start_header_id|>system<|end_header_id|> | |
| {SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|> | |
| Context: {context} | |
| Instruction: {instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|> | |
| {response}<|eot_id|></pre> | |
| </div> | |
| <div> | |
| <p style="font-weight:600;color:#7b5ea7;margin:0 0 5px;font-size:.85em;">Gemma 2</p> | |
| <pre class="pb-template purple"><bos><start_of_turn>user | |
| {SYSTEM_PROMPT} | |
| Context: {context} | |
| Instruction: {instruction}<end_of_turn> | |
| <start_of_turn>model | |
| {response}<end_of_turn></pre> | |
| </div> | |
| <div> | |
| <p style="font-weight:600;color:#7b5ea7;margin:0 0 5px;font-size:.85em;">Qwen2 & Yi-1.5 (ChatML)</p> | |
| <pre class="pb-template purple"><|im_start|>system | |
| {SYSTEM_PROMPT}<|im_end|> | |
| <|im_start|>user | |
| Context: {context} | |
| Instruction: {instruction}<|im_end|> | |
| <|im_start|>assistant | |
| {response}<|im_end|></pre> | |
| </div> | |
| </div> | |
| <!-- ββ Speech Validation βββββββββββββββββββββββββββββββββββββββββββββββββ --> | |
| <div class="pb-section"><h2 style="color:#1e2a3a;margin:0 0 6px;">Speech Validation</h2></div> | |
| <p>Every generated speech passes a <strong>9-step validation pipeline</strong>. Invalid speeches are automatically | |
| regenerated up to 3 times. Baseline models exhibited higher failure rates, suggesting fine-tuning | |
| improved output quality directly.</p> | |
| <div class="pb-card" style="margin-top:12px;"> | |
| <table> | |
| <thead><tr> | |
| <th style="text-align:center;width:36px;">#</th> | |
| <th style="min-width:140px;">Check</th> | |
| <th>Detail</th> | |
| </tr></thead> | |
| <tbody> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">1</td> | |
| <td style="font-weight:600;">Template leakage</td> | |
| <td>27 markers: role tokens (<code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">\nuser</code>, <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">\nassistant</code>), context labels (<code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">Context:</code>, <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">Instruction:</code>), special tokens (<code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">[INST]</code>, im_start, etc.)</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">2</td> | |
| <td style="font-weight:600;">Unicode corruption</td> | |
| <td>14 corruption patterns + 11 forbidden script ranges (CJK, Cyrillic, Arabic, Thai, technical symbols)</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">3</td> | |
| <td style="font-weight:600;">Language detection</td> | |
| <td>spaCy <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">en_core_web_sm</code> with 85% confidence threshold on texts ≥ 30 characters</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">4</td> | |
| <td style="font-weight:600;">Repetition</td> | |
| <td>Same word > 3× consecutive; sequences of 3β7 words repeated > 3×; > 5 ordinal counting words</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">5</td> | |
| <td style="font-weight:600;">Semantic relevance</td> | |
| <td>Cosine similarity < 0.08 via <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">all-MiniLM-L6-v2</code> against “UK parliamentary debate about {section} on {topic}”</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">6</td> | |
| <td style="font-weight:600;">Length</td> | |
| <td>Valid word count: 43β635 words (P10βP90 of training corpus)</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">7</td> | |
| <td style="font-weight:600;">Concatenation</td> | |
| <td>Rejects if ≥ 4 parliamentary opening phrases (<code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">My Lords</code>, <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">Mr Speaker</code> …) suggesting multiple speeches joined</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">8</td> | |
| <td style="font-weight:600;">Corrupted endings</td> | |
| <td>Nonsensical suffixes (e.g. <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">▍▍▍</code>, <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">});</code>)</td> | |
| </tr> | |
| <tr> | |
| <td style="text-align:center;font-weight:700;color:#2d5282;">9</td> | |
| <td style="font-weight:600;">Refusal patterns</td> | |
| <td>AI refusal phrases (<code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">I cannot generate</code>, <code style="background:#eef2ff;padding:1px 5px;border-radius:3px;">I'm sorry but I cannot</code> …)</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| </div> | |
| """) | |
| # ββ Results Table βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown("## Results\n\n27,560 fully evaluated speeches β baseline (B) vs fine-tuned (F) across all 14 metrics.") | |
| gr.HTML(""" | |
| <div style="font-size:.87em;overflow-x:auto;padding:4px 0 20px 0;"> | |
| <p style="color:#555;margin-bottom:10px;"> | |
| <span style="background:#d4edda;color:#155724;padding:2px 9px;border-radius:5px;font-weight:600;">green = significant improvement</span> | |
| | |
| <span style="background:#f8d7da;color:#721c24;padding:2px 9px;border-radius:5px;font-weight:600;">red = significant regression</span> | |
| | |
| <span style="color:#888;">PPL ↓ · Self-BLEU ↓ · all others ↑</span> | |
| </p> | |
| <div style="background:#f8f9fc;border:1px solid #dde3ee;border-radius:12px;overflow:hidden;min-width:960px;"> | |
| <table style="border-collapse:collapse;width:100%;font-size:.82em;"> | |
| <thead> | |
| <tr style="background:#e8eef6;"> | |
| <th rowspan="2" style="padding:9px 11px;text-align:left;color:#1a3a5c;border-right:2px solid #c5d2e8;min-width:140px;">Model</th> | |
| <th colspan="5" style="padding:7px 8px;text-align:center;border-right:2px solid #c5d2e8;color:#2d5282;background:#dce8f5;">Linguistic Quality</th> | |
| <th colspan="4" style="padding:7px 8px;text-align:center;border-right:2px solid #c5d2e8;color:#4a3a7a;background:#e8e0f5;">Semantic Coherence</th> | |
| <th colspan="5" style="padding:7px 8px;text-align:center;color:#1a3a5c;background:#dce8f5;">Political Authenticity</th> | |
| </tr> | |
| <tr style="background:#eef2f8;font-size:.9em;"> | |
| <th style="padding:5px 6px;text-align:center;color:#2d5282;">PPL↓</th> | |
| <th style="padding:5px 6px;text-align:center;color:#2d5282;">Dist-N↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#2d5282;">Self-BLEU↓</th> | |
| <th style="padding:5px 6px;text-align:center;color:#2d5282;">J_Coh↑</th> | |
| <th style="padding:5px 6px;text-align:center;border-right:2px solid #c5d2e8;color:#2d5282;">J_Conc↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#4a3a7a;">GRUEN↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#4a3a7a;">BERTScore↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#4a3a7a;">MoverScore↑</th> | |
| <th style="padding:5px 6px;text-align:center;border-right:2px solid #c5d2e8;color:#4a3a7a;">J_Rel↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#1a3a5c;">PSA↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#1a3a5c;">Party Align↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#1a3a5c;">J_Auth↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#1a3a5c;">J_PolApp↑</th> | |
| <th style="padding:5px 6px;text-align:center;color:#1a3a5c;">J_Qual↑</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <!-- Llama --> | |
| <tr style="background:#f4f8fc;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #4a7fa5;border-right:2px solid #c5d2e8;">Llama 3.1 8B (B)</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">60.854</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.988</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.006</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">7.041</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">5.935</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.539</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.803</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.505</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">5.465</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.399</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.504</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">4.403</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">6.177</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">4.791</td> | |
| </tr> | |
| <tr style="background:#e6eef8;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #4a7fa5;border-right:2px solid #c5d2e8;">Llama 3.1 8B (F)</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">31.724 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">0.974 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.018</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">7.915 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;border-right:2px solid #c5d2e8;">7.129 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">0.508 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.820 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.511 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;border-right:2px solid #c5d2e8;">6.139 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.487 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.576 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">6.106 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">7.277 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">5.399 β</td> | |
| </tr> | |
| <!-- Gemma --> | |
| <tr style="background:#f5f3fc;border-top:2px solid #dde3ee;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #7b5ea7;border-right:2px solid #c5d2e8;">Gemma 2 9B (B)</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">89.784</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.992</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.008</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">7.788</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">4.784</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.526</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.804</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.508</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">5.782</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.444</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.543</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">3.837</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">6.498</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">4.442</td> | |
| </tr> | |
| <tr style="background:#ece8f8;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #7b5ea7;border-right:2px solid #c5d2e8;">Gemma 2 9B (F)</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">101.578 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.990</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.010</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">7.507 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">5.006</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.501</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.804</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.510 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">5.529</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.498 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.590</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">4.209 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">7.293 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">4.950 β</td> | |
| </tr> | |
| <!-- Mistral --> | |
| <tr style="background:#f2f6fb;border-top:2px solid #dde3ee;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #2d5282;border-right:2px solid #c5d2e8;">Mistral 7B v0.3 (B)</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">31.280</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.966</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.008</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">6.598</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">6.899</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.555</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.810</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.505</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">5.418</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.418</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.521</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">4.237</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">5.617</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">4.179</td> | |
| </tr> | |
| <tr style="background:#dde8f5;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #2d5282;border-right:2px solid #c5d2e8;">Mistral 7B v0.3 (F)</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">29.562 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.972 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.016</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">7.961 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;border-right:2px solid #c5d2e8;">8.962 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.552</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.825 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.508</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;border-right:2px solid #c5d2e8;">5.681 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.437 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">0.507 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">3.983 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">6.382 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">3.727 β</td> | |
| </tr> | |
| <!-- Qwen --> | |
| <tr style="background:#f3f5f9;border-top:2px solid #dde3ee;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #4a6080;border-right:2px solid #c5d2e8;">Qwen2 7B (B)</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">44.927</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.981</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.020</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">7.911</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">5.928</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.488</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.803</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.508</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">6.904</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.444</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.560</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">6.565</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">7.291</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">6.348</td> | |
| </tr> | |
| <tr style="background:#e4e8f0;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #4a6080;border-right:2px solid #c5d2e8;">Qwen2 7B (F)</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">36.090 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.982</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.017 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">8.060 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;border-right:2px solid #c5d2e8;">7.625 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.539 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.821 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.512 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;border-right:2px solid #c5d2e8;">6.009 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.488 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.572</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">5.731 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">7.138</td> | |
| <td style="padding:5px 6px;text-align:center;background:#f8d7da;color:#721c24;font-weight:600;">5.014 β</td> | |
| </tr> | |
| <!-- Yi --> | |
| <tr style="background:#f4f4f8;border-top:2px solid #dde3ee;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #4a4a6a;border-right:2px solid #c5d2e8;">Yi 6B (B)</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">82.100</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.990</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.006</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">6.741</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">4.303</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.563</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.799</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.505</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;border-right:2px solid #c5d2e8;">4.490</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.343</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.423</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">2.981</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">5.385</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">3.083</td> | |
| </tr> | |
| <tr style="background:#e8e8f0;"> | |
| <td style="padding:7px 11px;font-weight:600;color:#1e2a3a;border-left:4px solid #4a4a6a;border-right:2px solid #c5d2e8;">Yi 6B (F)</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">42.893 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.987</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.016</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">8.043 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;border-right:2px solid #c5d2e8;">6.856 β</td> | |
| <td style="padding:5px 6px;text-align:center;color:#222;">0.537</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.817 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.511 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;border-right:2px solid #c5d2e8;">5.984 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.493 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">0.582 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">6.102 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">7.326 β</td> | |
| <td style="padding:5px 6px;text-align:center;background:#d4edda;color:#155724;font-weight:600;">5.392 β</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| <p style="color:#888;font-size:.79em;margin-top:8px;">↑/↓ p < 0.05 after Bonferroni correction. PSA and Party Align on 0–1 scale; J scores on 0–10 scale; GRUEN/BERT/Mover on 0–1; PPL raw.</p> | |
| </div> | |
| """) | |
| gr.Markdown("---\n\n## Political Spectrum & Party Alignment") | |
| # ββ PSA Diagram β PRESERVED EXACTLY ββββββββββββββββββββββββββββββ | |
| gr.HTML(""" | |
| <div style="color:#222;padding:4px 0 24px 0;"> | |
| <p style="color:#444;margin-bottom:20px;line-height:1.7;"> | |
| These are the two <strong>novel embedding-based metrics</strong> introduced by ParliaBench to measure | |
| political authenticity β dimensions entirely unavailable to conventional NLP metrics. | |
| </p> | |
| <!-- Two-column layout --> | |
| <div style="display:flex;gap:24px;flex-wrap:wrap;"> | |
| <!-- PSA Card --> | |
| <div style="flex:1;min-width:300px;background:#f8f9fc;border:1px solid #dde3ee;border-radius:10px;padding:20px 22px;"> | |
| <h3 style="color:#1a3a5c;margin:0 0 12px 0;font-size:1em;">Political Spectrum Alignment (PSA)</h3> | |
| <p style="color:#444;font-size:.88em;line-height:1.7;margin-bottom:16px;"> | |
| Measures how well a generated speech's <em>ideological positioning</em> matches the intended | |
| party orientation on the leftβright spectrum. | |
| </p> | |
| <!-- Spectrum diagram --> | |
| <div style="background:#fff;border:1px solid #e0e0e0;border-radius:8px;padding:16px 12px 10px;margin-bottom:14px;"> | |
| <div style="position:relative;height:28px;margin:0 8px 6px;"> | |
| <div style="position:absolute;top:12px;left:0;right:0;height:4px;background:linear-gradient(to right,#c0392b,#e67e22,#f1c40f,#2ecc71,#3498db);border-radius:2px;"></div> | |
| <!-- Party dots --> | |
| <div style="position:absolute;top:2px;left:2%;transform:translateX(-50%);" title="Green Party"> | |
| <div style="width:12px;height:12px;background:#27ae60;border-radius:50%;border:2px solid #fff;box-shadow:0 1px 3px rgba(0,0,0,.3);"></div> | |
| </div> | |
| <div style="position:absolute;top:2px;left:18%;transform:translateX(-50%);" title="SNP / Labour / Plaid"> | |
| <div style="width:12px;height:12px;background:#e74c3c;border-radius:50%;border:2px solid #fff;box-shadow:0 1px 3px rgba(0,0,0,.3);"></div> | |
| </div> | |
| <div style="position:absolute;top:2px;left:36%;transform:translateX(-50%);" title="LibDems"> | |
| <div style="width:12px;height:12px;background:#f39c12;border-radius:50%;border:2px solid #fff;box-shadow:0 1px 3px rgba(0,0,0,.3);"></div> | |
| </div> | |
| <div style="position:absolute;top:2px;left:72%;transform:translateX(-50%);" title="Conservative"> | |
| <div style="width:12px;height:12px;background:#3498db;border-radius:50%;border:2px solid #fff;box-shadow:0 1px 3px rgba(0,0,0,.3);"></div> | |
| </div> | |
| <div style="position:absolute;top:2px;left:88%;transform:translateX(-50%);" title="DUP"> | |
| <div style="width:12px;height:12px;background:#8e44ad;border-radius:50%;border:2px solid #fff;box-shadow:0 1px 3px rgba(0,0,0,.3);"></div> | |
| </div> | |
| <!-- Generated speech marker --> | |
| <div style="position:absolute;top:-2px;left:65%;transform:translateX(-50%);"> | |
| <div style="width:0;height:0;border-left:7px solid transparent;border-right:7px solid transparent;border-top:12px solid #e74c3c;"></div> | |
| </div> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;font-size:.72em;color:#888;margin:0 8px;"> | |
| <span>Far-left (β6)</span><span>Centre (0)</span><span>Far-right (+6)</span> | |
| </div> | |
| <div style="font-size:.74em;color:#555;margin-top:10px;line-height:1.6;"> | |
| <span style="display:inline-block;width:10px;height:10px;background:#27ae60;border-radius:50%;vertical-align:middle;margin-right:4px;"></span>Green | |
| <span style="display:inline-block;width:10px;height:10px;background:#e74c3c;border-radius:50%;vertical-align:middle;margin-right:4px;"></span>Labour/SNP | |
| <span style="display:inline-block;width:10px;height:10px;background:#f39c12;border-radius:50%;vertical-align:middle;margin-right:4px;"></span>LibDems | |
| <span style="display:inline-block;width:10px;height:10px;background:#3498db;border-radius:50%;vertical-align:middle;margin-right:4px;"></span>Conservative | |
| <span style="display:inline-block;width:10px;height:10px;background:#8e44ad;border-radius:50%;vertical-align:middle;margin-right:4px;"></span>DUP | |
| <span style="color:#e74c3c;font-weight:700;">▼ generated speech</span> | |
| </div> | |
| </div> | |
| <div style="background:#eef4ff;border-left:3px solid #4a90d9;padding:10px 14px;border-radius:0 6px 6px 0;font-size:.83em;color:#222;line-height:1.7;"> | |
| <strong>How it's computed:</strong><br> | |
| 1. Build <em>centroid embeddings</em> for each orientation (Far-left → Far-right) from real ParlaMint-GB speeches<br> | |
| 2. Embed the generated speech with <code>all-mpnet-base-v2</code><br> | |
| 3. Find the closest orientation centroid via cosine similarity<br> | |
| 4. Score = <code>sim(speech, closest_centroid) × max(0, 1 − Δφ/12)</code><br> | |
| where Δφ = |expected_orientation − closest_orientation|<br> | |
| 5. Range 0→1; perfect alignment approaches 1 | |
| </div> | |
| </div> | |
| <!-- Party Alignment Card --> | |
| <div style="flex:1;min-width:300px;background:#f8fcf8;border:1px solid #ddeedd;border-radius:10px;padding:20px 22px;"> | |
| <h3 style="color:#1a4a2a;margin:0 0 12px 0;font-size:1em;">Party Alignment</h3> | |
| <p style="color:#444;font-size:.88em;line-height:1.7;margin-bottom:16px;"> | |
| Measures how closely a generated speech matches the <em>linguistic style and rhetoric</em> | |
| of the specified party, independent of spectrum position. | |
| </p> | |
| <!-- Party centroid diagram --> | |
| <div style="background:#fff;border:1px solid #e0e0e0;border-radius:8px;padding:16px 12px;margin-bottom:14px;"> | |
| <div style="position:relative;height:130px;"> | |
| <!-- Axes hint --> | |
| <div style="position:absolute;top:50%;left:50%;width:90%;height:1px;background:#eee;transform:translate(-50%,-50%);"></div> | |
| <div style="position:absolute;top:10%;left:50%;width:1px;height:80%;background:#eee;transform:translateX(-50%);"></div> | |
| <!-- Party centroids --> | |
| <div style="position:absolute;top:20%;left:68%;"> | |
| <div style="width:36px;height:36px;background:#3498db22;border:2px solid #3498db;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:.68em;color:#1a5c8a;font-weight:700;">Con</div> | |
| </div> | |
| <div style="position:absolute;top:55%;left:20%;"> | |
| <div style="width:36px;height:36px;background:#e74c3c22;border:2px solid #e74c3c;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:.68em;color:#8a1a1a;font-weight:700;">Lab</div> | |
| </div> | |
| <div style="position:absolute;top:15%;left:28%;"> | |
| <div style="width:34px;height:34px;background:#f1c40f22;border:2px solid #f1c40f;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:.65em;color:#7a6000;font-weight:700;">SNP</div> | |
| </div> | |
| <div style="position:absolute;top:65%;left:62%;"> | |
| <div style="width:32px;height:32px;background:#27ae6022;border:2px solid #27ae60;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:.62em;color:#1a5c30;font-weight:700;">LibD</div> | |
| </div> | |
| <!-- Generated speech --> | |
| <div style="position:absolute;top:38%;left:50%;"> | |
| <div style="width:14px;height:14px;background:#e74c3c;border-radius:50%;border:3px solid #fff;box-shadow:0 2px 6px rgba(0,0,0,.3);transform:translate(-50%,-50%);"></div> | |
| </div> | |
| <!-- Dotted line to Lab --> | |
| <svg style="position:absolute;top:0;left:0;width:100%;height:100%;pointer-events:none;"> | |
| <line x1="50%" y1="38%" x2="23%" y2="73%" stroke="#e74c3c" stroke-width="1.5" stroke-dasharray="4,3" opacity="0.7"/> | |
| <text x="32%" y="52%" font-size="9" fill="#e74c3c" opacity="0.9">sim=0.61</text> | |
| </svg> | |
| </div> | |
| <div style="font-size:.74em;color:#555;line-height:1.6;margin-top:4px;"> | |
| <span style="display:inline-block;width:10px;height:10px;background:#e74c3c;border-radius:50%;vertical-align:middle;margin-right:4px;"></span>generated speech (expected: Labour) | |
| · circles = party centroid embeddings | |
| </div> | |
| </div> | |
| <div style="background:#eefaf2;border-left:3px solid #27ae60;padding:10px 14px;border-radius:0 6px 6px 0;font-size:.83em;color:#222;line-height:1.7;"> | |
| <strong>How it's computed:</strong><br> | |
| 1. Build a <em>centroid embedding</em> per party from all real speeches in that party's training data<br> | |
| 2. Embed the generated speech with <code>all-mpnet-base-v2</code><br> | |
| 3. Score = <code>cosine_similarity(speech, expected_party_centroid)</code><br> | |
| 4. Range 0→1; captures party-specific vocabulary, rhetorical style, and framing beyond ideological position alone | |
| </div> | |
| </div> | |
| </div> | |
| <div style="background:#f0f4ff;border:1px solid #c5d2e8;border-radius:8px;padding:14px 18px;margin-top:20px;font-size:.85em;color:#222;line-height:1.7;"> | |
| <strong>Key finding:</strong> Both metrics successfully discriminate their target dimensions (both p < 0.001). | |
| All five fine-tuned models showed statistically significant improvements in PSA (effect sizes d = 0.14–1.05), | |
| validating that fine-tuning genuinely improves ideological alignment — not just surface fluency. | |
| Mistral achieved the highest PSA after fine-tuning (8.94), while Llama led on Party Alignment (6.19). | |
| </div> | |
| </div> | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| ## Evaluation Framework | |
| 27,560 fully evaluated speeches across three assessment dimensions: | |
| ### Linguistic quality | |
| - **Perplexity (PPL)** β text naturalness via GPT-2 (β better) | |
| - **Distinct-N** β lexical diversity via unique n-gram ratios (β better) | |
| - **Self-BLEU** β intra-model diversity; lower = more varied outputs (β better) | |
| - **J_Coh / J_Conc** β LLM-as-a-Judge coherence and conciseness (1β10 scale) | |
| ### Semantic coherence | |
| - **GRUEN** β grammaticality and semantic coherence (β better) | |
| - **BERTScore** β semantic similarity via RoBERTa-large F1 (β better) | |
| - **MoverScore** β Earth Mover's Distance over contextual embeddings (β better) | |
| - **J_Rel** β LLM-as-a-Judge relevance to prompt (1β10 scale) | |
| ### Political authenticity *(novel metrics)* | |
| - **Political Spectrum Alignment (PSA)** β cosine similarity to orientation centroids weighted by ideological distance on a 13-point leftβright scale | |
| - **Party Alignment** β cosine similarity to party-specific embedding centroids | |
| - **J_Auth / J_PolApp / J_Qual** β LLM-as-a-Judge authenticity, political appropriateness, overall quality (1β10 scale) | |
| LLM judge: **FlowJudge-v0.1** (3.8B, Phi-3.5-mini architecture) β architecturally independent from all evaluated models. | |
| --- | |
| ## Citation | |
| ```bibtex | |
| @article{koniaris2025parliabench, | |
| title = {ParliaBench: An Evaluation and Benchmarking Framework for | |
| LLM-Generated Parliamentary Speech}, | |
| author = {Koniaris, Marios and Tsipi, Argyro and Tsanakas, Panayiotis}, | |
| journal = {arXiv preprint arXiv:2511.08247}, | |
| year = {2025}, | |
| url = {https://arxiv.org/abs/2511.08247} | |
| } | |
| ``` | |
| *National Technical University of Athens Β· School of Electrical and Computer Engineering* | |
| """) | |
| # ββ Tab 2: Sample Gallery βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("Sample Gallery"): | |
| gr.Markdown( | |
| "### Generated Speech Examples\n" | |
| "Representative outputs from the ParliaBench evaluation set β " | |
| "one per model, comparing **baseline** and **fine-tuned** performance." | |
| ) | |
| with gr.Row(): | |
| filter_radio = gr.Radio( | |
| choices=["All", "Fine-tuned", "Baseline"], | |
| value="All", | |
| label="Filter by type", | |
| interactive=True, | |
| ) | |
| def _build_choices(filter_val): | |
| filtered = SAMPLES | |
| if filter_val == "Fine-tuned": | |
| filtered = [s for s in SAMPLES if s.get("is_finetuned")] | |
| elif filter_val == "Baseline": | |
| filtered = [s for s in SAMPLES if not s.get("is_finetuned")] | |
| return [ | |
| f"{'FT' if s.get('is_finetuned') else 'Base'} Β· {s['model']} Β· {s['party']} Β· {s['topic']}" | |
| for s in filtered | |
| ], filtered | |
| initial_choices, initial_filtered = _build_choices("All") | |
| sample_sel = gr.Dropdown( | |
| choices=initial_choices, | |
| value=initial_choices[0], | |
| label="Select a speech", | |
| interactive=True, | |
| ) | |
| sample_html = gr.HTML(_render_sample(SAMPLES[0])) | |
| def _show_sample(choice: str, filter_val: str) -> str: | |
| _, filtered = _build_choices(filter_val) | |
| label_map = { | |
| f"{'FT' if s.get('is_finetuned') else 'Base'} Β· {s['model']} Β· {s['party']} Β· {s['topic']}": s | |
| for s in filtered | |
| } | |
| s = label_map.get(choice) | |
| return _render_sample(s) if s else "" | |
| def _update_filter(filter_val): | |
| choices, filtered = _build_choices(filter_val) | |
| return gr.update(choices=choices, value=choices[0]), _render_sample(filtered[0]) | |
| filter_radio.change(fn=_update_filter, inputs=filter_radio, outputs=[sample_sel, sample_html]) | |
| sample_sel.change(fn=_show_sample, inputs=[sample_sel, filter_radio], outputs=sample_html) | |
| # ββ Tab 3: Generate Speech βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("Generate Speech"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Configuration") | |
| model_select = gr.Dropdown( | |
| choices=list(MODELS.keys()), | |
| value="Llama-3.1-8B (fine-tuned)", | |
| label="Model", | |
| info="Fine-tuned = QLoRA adapter on Unsloth base; Baseline = raw 4-bit base model", | |
| ) | |
| with gr.Group(): | |
| party_select = gr.Dropdown( | |
| choices=PARTIES, value="Conservative", label="Party", | |
| ) | |
| orientation_box = gr.Textbox( | |
| value=get_orientation("Conservative"), | |
| label="Political Orientation (auto-filled)", | |
| interactive=False, | |
| ) | |
| house_select = gr.Dropdown( | |
| choices=HOUSES, value="House of Commons", label="House", | |
| info="Some parties are restricted to the Lords", | |
| ) | |
| topic_select = gr.Dropdown( | |
| choices=EUROVOC_TOPICS, value="POLITICS", label="EuroVoc Topic", | |
| info="21 domains from the EUROVOC thesaurus", | |
| ) | |
| section_input = gr.Textbox( | |
| value="National Health Service Funding", | |
| label="Debate Section / Bill Title", | |
| placeholder="e.g. Climate Change Act, Defence Procurement...", | |
| ) | |
| instruction_input = gr.Textbox( | |
| label="Custom Instruction (optional)", | |
| placeholder="Leave blank for generic instruction, or enter a specific question/prompt from the debate...", | |
| lines=2, | |
| ) | |
| gr.Markdown("### Generation Parameters") | |
| temperature = gr.Slider(0.1, 1.5, value=DEFAULT_GEN_PARAMS["temperature"], step=0.05, label="Temperature") | |
| top_p = gr.Slider(0.5, 1.0, value=DEFAULT_GEN_PARAMS["top_p"], step=0.05, label="Top-p (nucleus sampling)") | |
| rep_penalty = gr.Slider(1.0, 2.0, value=DEFAULT_GEN_PARAMS["repetition_penalty"], step=0.05, label="Repetition Penalty") | |
| max_new_toks = gr.Slider(100, 850, value=500, step=50, label="Max New Tokens") | |
| with gr.Row(): | |
| min_words = gr.Number(value=DEFAULT_GEN_PARAMS["min_words"], label="Min Words", precision=0) | |
| max_words = gr.Number(value=300, label="Max Words (demo cap)", precision=0) | |
| gen_btn = gr.Button("Generate Speech", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Generated Speech") | |
| speech_out = gr.Textbox(label="Output", lines=18, show_copy_button=True, elem_id="speech") | |
| stats_out = gr.Markdown("*Stats will appear here after generation.*") | |
| params_out = gr.Textbox(label="Parameters Used", interactive=False) | |
| with gr.Accordion("Full Prompt Sent to Model", open=False): | |
| prompt_out = gr.Textbox( | |
| label="Prompt (read-only)", lines=14, interactive=False, elem_id="prompt", | |
| ) | |
| gr.Markdown( | |
| "---\n" | |
| "The prompt panel shows the **exact input** fed to the model " | |
| "(including chat template tokens) β useful for reproducibility." | |
| ) | |
| party_select.change(_update_house, party_select, house_select) | |
| party_select.change(_update_orientation, party_select, orientation_box) | |
| gen_btn.click( | |
| fn=generate_speech, | |
| inputs=[model_select, party_select, topic_select, section_input, | |
| house_select, instruction_input, | |
| temperature, top_p, rep_penalty, max_new_toks, min_words, max_words], | |
| outputs=[speech_out, prompt_out, stats_out, params_out], | |
| ) | |
| gr.Markdown( | |
| "---\n" | |
| "<small>ParliaBench Demo Β· NTUA 2025 Β· " | |
| "[argyrotsipi on HF](https://huggingface.co/argyrotsipi) Β· " | |
| "[Train dataset](https://huggingface.co/datasets/argyrotsipi/train-dataset) Β· " | |
| "[Generated dataset](https://huggingface.co/datasets/argyrotsipi/generated-dataset)</small>" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=False) | |