Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| import time | |
| from typing import Dict, Optional, List, Tuple | |
| import threading | |
| import random | |
| # Model configurations - ordered as requested | |
| MODEL_ORDER = [ | |
| ("Leesplank Noot EuroLLM-1.7b", "UWV/leesplank-noot-eurollm-1.7b"), | |
| ("Leesplank Noot Granite-3.3-2b", "UWV/leesplank-noot-granite-3.3-2b"), | |
| ("Leesplank Noot Llama-3.2-3b", "UWV/leesplank-noot-llama-3.2-3b") | |
| ] | |
| # System prompt for Llama and Granite models | |
| SYSTEM_PROMPT = """Je bent een AI-assistent die Nederlandse teksten vereenvoudigt naar een helder, toegankelijk niveau voor iedereen, vergelijkbaar met de heldere taal die het Jeugdjournaal gebruikt. Behoud de betekenis en belangrijke informatie, maar gebruik eenvoudigere woorden en kortere zinnen. Schrijf niet kinderlijk, maar wel toegankelijk.""" | |
| # Example texts | |
| EXAMPLES = [ | |
| "Een pekdruppelexperiment is een langetermijnexperiment dat het vloeien van een stuk pek meet over vele jaren. Pek is een verzamelnaam voor een aantal vloeistoffen met een zeer hoge viscositeit, zoals teer en bitumen, die er bij kamertemperatuur uitzien als een vaste stof, maar in feite zeer dik vloeibaar zijn en uiteindelijk druppels vormen.", | |
| "De kwantummechanica is een natuurkundige theorie die het gedrag beschrijft van materie en energie op de schaal van atomen en subatomaire deeltjes. In tegenstelling tot de klassieke mechanica, waar objecten een bepaalde positie en snelheid hebben, beschrijft de kwantummechanica deeltjes met waarschijnlijkheidsgolven.", | |
| "Fotosynthese is het biologische proces waarbij planten, algen en sommige bacteriën lichtenergie omzetten in chemische energie. Dit gebeurt in de chloroplasten, waar chlorofyl zonlicht absorbeert en gebruikt om koolstofdioxide en water om te zetten in glucose en zuurstof.", | |
| "Een algoritme is een eindige reeks goed gedefinieerde instructies om een bepaald probleem op te lossen of een berekening uit te voeren. In de informatica worden algoritmes gebruikt als specificaties voor het uitvoeren van berekeningen, gegevensverwerking, geautomatiseerd redeneren en andere taken." | |
| ] | |
| # Global model cache | |
| MODEL_CACHE: Dict[str, Optional[pipeline]] = {} | |
| cache_lock = threading.Lock() | |
| def clear_gpu_memory(): | |
| """Clear GPU memory when switching models""" | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| import gc | |
| gc.collect() | |
| def load_model(model_path: str): | |
| """Load model with caching to avoid reloading""" | |
| with cache_lock: | |
| if model_path in MODEL_CACHE and MODEL_CACHE[model_path] is not None: | |
| return MODEL_CACHE[model_path] | |
| try: | |
| model = pipeline( | |
| "text-generation", | |
| model=model_path, | |
| torch_dtype="auto", | |
| device_map="auto" | |
| ) | |
| MODEL_CACHE[model_path] = model | |
| return model | |
| except Exception as e: | |
| raise Exception(f"Failed to load model {model_path}: {str(e)}") | |
| def simplify_single_model(text: str, model_name: str, model_path: str) -> Dict: | |
| """Simplify text using a single model""" | |
| try: | |
| # Load model | |
| model = load_model(model_path) | |
| # Format messages based on model | |
| if "eurollm" in model_path.lower(): | |
| messages = [{ | |
| "role": "user", | |
| "content": f"Vereenvoudig: {text}" | |
| }] | |
| else: | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": f"Vereenvoudig: {text}"} | |
| ] | |
| # Generate with timing | |
| start_time = time.time() | |
| output = model( | |
| messages, | |
| max_new_tokens=256, | |
| return_full_text=False, | |
| do_sample=False, | |
| pad_token_id=model.tokenizer.eos_token_id, | |
| eos_token_id=model.tokenizer.eos_token_id | |
| ) | |
| generation_time = time.time() - start_time | |
| simplified = output[0]["generated_text"].strip() | |
| # Calculate metrics | |
| input_tokens = len(model.tokenizer.encode(text)) | |
| output_tokens = len(model.tokenizer.encode(simplified)) | |
| tokens_per_sec = output_tokens / generation_time if generation_time > 0 else 0 | |
| return { | |
| "model_name": model_name, | |
| "text": simplified, | |
| "time": generation_time, | |
| "tokens_per_sec": tokens_per_sec, | |
| "input_tokens": input_tokens, | |
| "output_tokens": output_tokens, | |
| "success": True | |
| } | |
| except Exception as e: | |
| return { | |
| "model_name": model_name, | |
| "text": f"Fout: {str(e)}", | |
| "time": 0, | |
| "tokens_per_sec": 0, | |
| "input_tokens": 0, | |
| "output_tokens": 0, | |
| "success": False | |
| } | |
| def simplify_text_sequential(text: str, selected_models: List[str]): | |
| """Simplify text using multiple models sequentially with progressive updates""" | |
| if not text.strip(): | |
| empty_result = "Voer tekst in om te vereenvoudigen" | |
| yield (empty_result, "") + ("", "") * 2 | |
| return | |
| # Filter models based on selection | |
| models_to_run = [(name, path) for name, path in MODEL_ORDER if name in selected_models] | |
| if not models_to_run: | |
| empty_result = "Selecteer minstens één model" | |
| yield (empty_result, "") + ("", "") * 2 | |
| return | |
| # Initialize outputs with "waiting" status | |
| results = {} | |
| for name, _ in MODEL_ORDER: | |
| if name in selected_models: | |
| results[name] = { | |
| "text": "⏳ Wachten op verwerking...", | |
| "metrics": "" | |
| } | |
| else: | |
| results[name] = { | |
| "text": "Model niet geselecteerd", | |
| "metrics": "" | |
| } | |
| # Initial yield to show waiting status | |
| output_tuple = [] | |
| for name, _ in MODEL_ORDER: | |
| r = results[name] | |
| output_tuple.extend([r["text"], r["metrics"]]) | |
| yield tuple(output_tuple) | |
| # Randomize execution order for fairness in timing, but keep display order fixed | |
| models_to_execute = models_to_run.copy() | |
| random.shuffle(models_to_execute) | |
| # Run models sequentially and update progressively | |
| for name, path in models_to_execute: | |
| # Update status to processing | |
| results[name]["text"] = "🔄 Verwerken..." | |
| # Yield current state | |
| output_tuple = [] | |
| for n, _ in MODEL_ORDER: | |
| r = results[n] | |
| output_tuple.extend([r["text"], r["metrics"]]) | |
| yield tuple(output_tuple) | |
| try: | |
| # Clear GPU memory before each model to prevent OOM | |
| clear_gpu_memory() | |
| # Actually run the model | |
| result = simplify_single_model(text, name, path) | |
| # Update with results | |
| results[name]["text"] = result["text"] | |
| if result["success"]: | |
| results[name]["metrics"] = f"""<div style="color: #666; font-size: 0.9em; margin-top: 10px;"> | |
| ⏱️ **Tijd:** {result['time']:.2f}s | | |
| ⚡ **Snelheid:** {result['tokens_per_sec']:.1f} tokens/s | | |
| 📊 **Tokens:** {result['output_tokens']} | |
| </div>""" | |
| else: | |
| results[name]["metrics"] = "" | |
| except Exception as e: | |
| results[name]["text"] = f"❌ Fout: {str(e)}" | |
| results[name]["metrics"] = "" | |
| # Yield updated state after each model completes | |
| output_tuple = [] | |
| for n, _ in MODEL_ORDER: | |
| r = results[n] | |
| output_tuple.extend([r["text"], r["metrics"]]) | |
| yield tuple(output_tuple) | |
| def create_interface(): | |
| """Create Gradio interface with enhanced UX""" | |
| # Use Default theme which is light by default | |
| uwv_theme = gr.themes.Default( | |
| primary_hue=gr.themes.colors.blue, | |
| font=[gr.themes.GoogleFont("Open Sans"), "Arial", "sans-serif"] | |
| ).set( | |
| body_background_fill="#FFFFFF", | |
| body_background_fill_dark="#FFFFFF", | |
| background_fill_primary="#F7F9FB", | |
| background_fill_primary_dark="#F7F9FB", | |
| background_fill_secondary="#FFFFFF", | |
| background_fill_secondary_dark="#FFFFFF", | |
| button_primary_background_fill="#01689B", | |
| button_primary_background_fill_dark="#01689B", | |
| button_primary_background_fill_hover="#01547D", | |
| button_primary_background_fill_hover_dark="#01547D", | |
| button_primary_text_color="#FFFFFF", | |
| button_primary_text_color_dark="#FFFFFF", | |
| block_background_fill="#FFFFFF", | |
| block_background_fill_dark="#FFFFFF", | |
| block_title_text_color="#01689B", | |
| block_title_text_color_dark="#01689B", | |
| block_label_text_color="#333333", | |
| block_label_text_color_dark="#333333", | |
| body_text_color="#333333", | |
| body_text_color_dark="#333333", | |
| body_text_color_subdued="#666666", | |
| body_text_color_subdued_dark="#666666", | |
| panel_background_fill="#FFFFFF", | |
| panel_background_fill_dark="#FFFFFF", | |
| panel_border_color="#E0E0E0", | |
| panel_border_color_dark="#E0E0E0", | |
| input_background_fill="#FFFFFF", | |
| input_background_fill_dark="#FFFFFF", | |
| input_background_fill_focus="#FFFFFF", | |
| input_background_fill_focus_dark="#FFFFFF", | |
| input_background_fill_hover="#FFFFFF", | |
| input_background_fill_hover_dark="#FFFFFF", | |
| input_border_color="#E0E0E0", | |
| input_border_color_dark="#E0E0E0", | |
| input_border_color_focus="#01689B", | |
| input_border_color_focus_dark="#01689B", | |
| input_border_color_hover="#01689B", | |
| input_border_color_hover_dark="#01689B", | |
| input_placeholder_color="#999999", | |
| input_placeholder_color_dark="#999999", | |
| checkbox_background_color="#FFFFFF", | |
| checkbox_background_color_dark="#FFFFFF", | |
| checkbox_background_color_focus="#FFFFFF", | |
| checkbox_background_color_focus_dark="#FFFFFF", | |
| checkbox_background_color_hover="#FFFFFF", | |
| checkbox_background_color_hover_dark="#FFFFFF", | |
| checkbox_background_color_selected="#01689B", | |
| checkbox_background_color_selected_dark="#01689B", | |
| checkbox_border_color="#01689B", | |
| checkbox_border_color_dark="#01689B", | |
| checkbox_border_color_focus="#01689B", | |
| checkbox_border_color_focus_dark="#01689B", | |
| checkbox_border_color_hover="#01547D", | |
| checkbox_border_color_hover_dark="#01547D", | |
| checkbox_border_color_selected="#01689B", | |
| checkbox_border_color_selected_dark="#01689B", | |
| checkbox_label_background_fill="#FFFFFF", | |
| checkbox_label_background_fill_dark="#FFFFFF", | |
| checkbox_label_background_fill_hover="#F7F9FB", | |
| checkbox_label_background_fill_hover_dark="#F7F9FB", | |
| checkbox_label_background_fill_selected="#E8F4F8", | |
| checkbox_label_background_fill_selected_dark="#E8F4F8", | |
| checkbox_label_text_color="#333333", | |
| checkbox_label_text_color_dark="#333333", | |
| checkbox_label_text_color_selected="#01689B", | |
| checkbox_label_text_color_selected_dark="#01689B" | |
| ) | |
| with gr.Blocks( | |
| title="Leesplank Noot - UWV Innovatie Hub", | |
| theme=uwv_theme, | |
| css=""" | |
| /* Minimal UWV styling - let Gradio handle most things */ | |
| h1 { | |
| color: #01689B; | |
| border-top: 4px solid #01689B; | |
| padding-top: 1.5rem; | |
| } | |
| .author-block { | |
| background: linear-gradient(135deg, #FCE1BF 0%, #FFE9CC 100%); | |
| padding: 1.5rem; | |
| border-left: 4px solid #01689B; | |
| margin: 1.5rem 0; | |
| border-radius: 0 8px 8px 0; | |
| } | |
| /* Simple styling for example text cards */ | |
| .example-text-card { | |
| background: linear-gradient(135deg, #FCE1BF 0%, #FFE9CC 100%); | |
| border: 2px solid #E8F4F8; | |
| color: #333; | |
| padding: 1.5rem; | |
| min-height: 140px; | |
| line-height: 1.6; | |
| white-space: pre-wrap; | |
| cursor: pointer; | |
| border-radius: 8px; | |
| } | |
| .example-text-card:hover { | |
| border-color: #01689B; | |
| transform: translateY(-2px); | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 📝 Leesplank Noot | |
| ## Nederlandse Tekstvereenvoudiging | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Model selection checkboxes | |
| model_checkboxes = gr.CheckboxGroup( | |
| choices=[name for name, _ in MODEL_ORDER], | |
| value=[name for name, _ in MODEL_ORDER], # All selected by default | |
| label="Selecteer modellen", | |
| info="Kies welke modellen je wilt vergelijken" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.HTML(""" | |
| <div class="author-block"> | |
| <strong>Ontwikkeld door:</strong> UWV Innovatie Hub<br> | |
| <strong>Contact:</strong> <a href="mailto:innovatie@uwv.nl">innovatie@uwv.nl</a><br> | |
| <strong>Mede mogelijk gemaakt door:</strong> IDO 2024 (Innovatie en Digitalisering Ontwikkelingen) | |
| </div> | |
| """) | |
| gr.Markdown(""" | |
| Vergelijk tekstvereenvoudiging van drie AI-modellen naast elkaar. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_text = gr.Textbox( | |
| label="Voer je tekst in", | |
| placeholder="Typ of plak hier de tekst die je wilt vereenvoudigen...", | |
| lines=12, | |
| max_lines=20 | |
| ) | |
| simplify_btn = gr.Button( | |
| "🔄 Vereenvoudig tekst", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 📚 Voorbeeldteksten") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| btn1 = gr.Button( | |
| f"🔬 FYSICA\n\n{EXAMPLES[0][:120]}...", | |
| elem_classes="example-text-card", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| btn2 = gr.Button( | |
| f"⚛️ NATUURKUNDE\n\n{EXAMPLES[1][:120]}...", | |
| elem_classes="example-text-card", | |
| size="lg" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| btn3 = gr.Button( | |
| f"🧬 BIOLOGIE\n\n{EXAMPLES[2][:120]}...", | |
| elem_classes="example-text-card", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| btn4 = gr.Button( | |
| f"💻 INFORMATICA\n\n{EXAMPLES[3][:120]}...", | |
| elem_classes="example-text-card", | |
| size="lg" | |
| ) | |
| # Three-column layout for results | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1, visible=True) as col_eurollm: | |
| gr.Markdown('<div class="model-title">🚀 Leesplank Noot EuroLLM-1.7b</div>') | |
| output_eurollm = gr.Textbox( | |
| label="", | |
| lines=10, | |
| interactive=False | |
| ) | |
| metrics_eurollm = gr.Markdown("") | |
| with gr.Column(scale=1, visible=True) as col_granite: | |
| gr.Markdown('<div class="model-title">💎 Leesplank Noot Granite-3.3-2b</div>') | |
| output_granite = gr.Textbox( | |
| label="", | |
| lines=10, | |
| interactive=False | |
| ) | |
| metrics_granite = gr.Markdown("") | |
| with gr.Column(scale=1, visible=True) as col_llama: | |
| gr.Markdown('<div class="model-title">🦙 Leesplank Noot Llama-3.2-3b</div>') | |
| output_llama = gr.Textbox( | |
| label="", | |
| lines=10, | |
| interactive=False | |
| ) | |
| metrics_llama = gr.Markdown("") | |
| with gr.Accordion("ℹ️ Over deze demo", open=False): | |
| gr.Markdown(""" | |
| ## Projectinformatie | |
| Deze demo toont drie Nederlandse tekstvereenvoudigingsmodellen ontwikkeld door **UWV Innovatie Hub** als onderdeel van het Leesplank Noot project. | |
| ### Modellen: | |
| - **Leesplank Noot EuroLLM-1.7b**: 1.7B parameters, SARI score: 66.44 | |
| - **Leesplank Noot Granite-3.3-2b**: 2B parameters, SARI score: 67.80 | |
| - **Leesplank Noot Llama-3.2-3b**: 3B parameters, SARI score: 67.50 | |
| Alle modellen zijn getraind op 1.89M Nederlandse Wikipedia-vereenvoudigingen en produceren tekst op B1-niveau voor betere toegankelijkheid van overheidscommunicatie. | |
| ### Ontwikkeling: | |
| - **Ontwikkeld door:** UWV Innovatie Hub | |
| - **Projectleiding:** innovatie@uwv.nl | |
| - **Financiering:** Dit project is mede mogelijk gemaakt door **IDO 2024** (Innovatie en Digitalisering Ontwikkelingen) | |
| - **Licentie:** Apache 2.0 | |
| ### Doel: | |
| Het verbeteren van de toegankelijkheid van overheidsteksten voor burgers met leesmoeilijkheden, in lijn met de Wet digitale overheid en EU toegankelijkheidsrichtlijnen. | |
| """) | |
| # Event handlers | |
| simplify_btn.click( | |
| fn=simplify_text_sequential, | |
| inputs=[input_text, model_checkboxes], | |
| outputs=[ | |
| output_eurollm, metrics_eurollm, | |
| output_granite, metrics_granite, | |
| output_llama, metrics_llama | |
| ] | |
| ) | |
| input_text.submit( | |
| fn=simplify_text_sequential, | |
| inputs=[input_text, model_checkboxes], | |
| outputs=[ | |
| output_eurollm, metrics_eurollm, | |
| output_granite, metrics_granite, | |
| output_llama, metrics_llama | |
| ] | |
| ) | |
| # Example button handlers | |
| btn1.click(fn=lambda: EXAMPLES[0], outputs=input_text) | |
| btn2.click(fn=lambda: EXAMPLES[1], outputs=input_text) | |
| btn3.click(fn=lambda: EXAMPLES[2], outputs=input_text) | |
| btn4.click(fn=lambda: EXAMPLES[3], outputs=input_text) | |
| # Footer | |
| gr.Markdown(""" | |
| <div class="footer-section"> | |
| <div style="text-align: center; color: #333; padding-bottom: 2rem;"> | |
| <strong>Leesplank Noot</strong> - Nederlandse Tekstvereenvoudiging<br> | |
| Ontwikkeld door <strong>UWV Innovatie Hub</strong> | Contact: <a href="mailto:innovatie@uwv.nl" style="color: #01689B;">innovatie@uwv.nl</a><br> | |
| Mede mogelijk gemaakt door <strong>IDO 2024</strong> (Innovatie en Digitalisering Ontwikkelingen)<br> | |
| © 2025 UWV | Apache 2.0 License | <a href="https://huggingface.co/UWV/Leesplank-Noot-overview" target="_blank" style="color: #01689B;">HuggingFace</a> | <a href="https://uwv.nl" target="_blank" style="color: #01689B;">uwv.nl</a> | |
| </div> | |
| </div> | |
| """) | |
| return demo | |
| # Initialize and launch | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.queue(max_size=10) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |