Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from punctuators.models import PunctCapSegModelONNX | |
| # Load the punctuation model | |
| print("Loading XLM-RoBERTa punctuation model...") | |
| model = PunctCapSegModelONNX.from_pretrained( | |
| "1-800-BAD-CODE/xlm-roberta_punctuation_fullstop_truecase" | |
| ) | |
| print("Model loaded successfully!") | |
| def punctuate_text(input_text, progress=gr.Progress()): | |
| """ | |
| Generate 3 different punctuation corrections with varying strategies | |
| """ | |
| if not input_text.strip(): | |
| return ["", "", ""] | |
| corrections = [] | |
| # Three different approaches | |
| configs = [ | |
| {"name": "Conservative", "apply_sbd": False}, | |
| {"name": "With Sentence Boundaries", "apply_sbd": True}, | |
| {"name": "Balanced", "apply_sbd": True} | |
| ] | |
| for i, config in enumerate(configs): | |
| progress((i + 0.5) / 3, desc=f"Generating {config['name']} version...") | |
| if config["name"] == "Conservative": | |
| # Single text processing without sentence boundaries | |
| result = model.infer(texts=[input_text], apply_sbd=config["apply_sbd"]) | |
| corrected_text = result[0] | |
| elif config["name"] == "With Sentence Boundaries": | |
| # Process with sentence boundary detection | |
| result = model.infer(texts=[input_text], apply_sbd=config["apply_sbd"]) | |
| corrected_text = "\n".join(result[0]) if isinstance(result[0], list) else result[0] | |
| else: # Balanced | |
| # Process text in chunks if it's long | |
| if len(input_text) > 500: | |
| # Split into chunks | |
| chunks = [input_text[i:i+500] for i in range(0, len(input_text), 400)] | |
| results = [] | |
| for chunk in chunks: | |
| chunk_result = model.infer(texts=[chunk], apply_sbd=False) | |
| results.append(chunk_result[0]) | |
| corrected_text = " ".join(results) | |
| else: | |
| result = model.infer(texts=[input_text], apply_sbd=config["apply_sbd"]) | |
| corrected_text = "\n".join(result[0]) if isinstance(result[0], list) else result[0] | |
| corrections.append(corrected_text) | |
| progress((i + 1) / 3, desc=f"{config['name']} version complete") | |
| progress(1.0, desc="All corrections generated!") | |
| return corrections | |
| # Create Gradio interface | |
| with gr.Blocks(title="Multilingual Punctuation & Capitalization Correction", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π Multilingual Punctuation & Capitalization Correction | |
| This tool uses **XLM-RoBERTa** to restore punctuation, fix capitalization, and detect sentence boundaries in **47 languages**. | |
| Enter text without proper punctuation or capitalization, and get 3 different correction styles: | |
| - **π Conservative**: Minimal changes, preserves original flow | |
| - **π With Sentence Boundaries**: Splits text into clear sentences | |
| - **βοΈ Balanced**: Smart chunking for longer texts | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| input_text = gr.Textbox( | |
| label="Input Text (any of 47 supported languages)", | |
| placeholder="enter text without punctuation or capitalization like this example here it will be fixed", | |
| lines=12, | |
| max_lines=20 | |
| ) | |
| correct_btn = gr.Button("π Add Punctuation & Capitalization", variant="primary", size="lg") | |
| # Output section with 3 versions | |
| gr.Markdown("### π Correction Options") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("#### π Conservative") | |
| output_conservative = gr.Textbox( | |
| label="Conservative Correction", | |
| lines=10, | |
| max_lines=15, | |
| interactive=True, | |
| elem_id="conservative_output" | |
| ) | |
| copy_btn_1 = gr.Button("π Copy", variant="secondary", size="sm") | |
| with gr.Column(): | |
| gr.Markdown("#### π With Sentence Boundaries") | |
| output_boundaries = gr.Textbox( | |
| label="Sentence Boundary Detection", | |
| lines=10, | |
| max_lines=15, | |
| interactive=True, | |
| elem_id="boundaries_output" | |
| ) | |
| copy_btn_2 = gr.Button("π Copy", variant="secondary", size="sm") | |
| with gr.Column(): | |
| gr.Markdown("#### βοΈ Balanced") | |
| output_balanced = gr.Textbox( | |
| label="Balanced Correction", | |
| lines=10, | |
| max_lines=15, | |
| interactive=True, | |
| elem_id="balanced_output" | |
| ) | |
| copy_btn_3 = gr.Button("π Copy", variant="secondary", size="sm") | |
| # Selected version display | |
| with gr.Row(): | |
| gr.Markdown("### β Selected Correction") | |
| selected_text = gr.Textbox( | |
| label="Your Selected Correction", | |
| lines=5, | |
| interactive=True, | |
| placeholder="Click 'Use This' under any correction to select it" | |
| ) | |
| # Add selection buttons | |
| with gr.Row(): | |
| with gr.Column(): | |
| select_btn_1 = gr.Button("β Use This", variant="primary", size="sm") | |
| with gr.Column(): | |
| select_btn_2 = gr.Button("β Use This", variant="primary", size="sm") | |
| with gr.Column(): | |
| select_btn_3 = gr.Button("β Use This", variant="primary", size="sm") | |
| # Add examples | |
| gr.Examples( | |
| examples=[ | |
| ["hello there how are you doing today i hope everything is going well"], | |
| ["the quick brown fox jumps over the lazy dog this is a test sentence"], | |
| ["machine learning is revolutionizing many industries from healthcare to finance"], | |
| ["bonjour comment allez vous aujourdhui jespere que tout va bien"], | |
| ["hola como estas espero que todo este bien contigo y tu familia"], | |
| ], | |
| inputs=input_text, | |
| label="Example sentences (click to try)" | |
| ) | |
| # Set up event handlers | |
| outputs = [output_conservative, output_boundaries, output_balanced] | |
| correct_btn.click(fn=punctuate_text, inputs=input_text, outputs=outputs) | |
| input_text.submit(fn=punctuate_text, inputs=input_text, outputs=outputs) | |
| # Selection handlers | |
| select_btn_1.click(fn=lambda x: x, inputs=output_conservative, outputs=selected_text) | |
| select_btn_2.click(fn=lambda x: x, inputs=output_boundaries, outputs=selected_text) | |
| select_btn_3.click(fn=lambda x: x, inputs=output_balanced, outputs=selected_text) | |
| # JavaScript for copy functionality | |
| copy_btn_1.click( | |
| None, | |
| None, | |
| None, | |
| js=""" | |
| () => { | |
| const outputText = document.querySelector('#conservative_output textarea').value; | |
| navigator.clipboard.writeText(outputText); | |
| alert('Conservative version copied to clipboard!'); | |
| } | |
| """ | |
| ) | |
| copy_btn_2.click( | |
| None, | |
| None, | |
| None, | |
| js=""" | |
| () => { | |
| const outputText = document.querySelector('#boundaries_output textarea').value; | |
| navigator.clipboard.writeText(outputText); | |
| alert('Sentence boundaries version copied to clipboard!'); | |
| } | |
| """ | |
| ) | |
| copy_btn_3.click( | |
| None, | |
| None, | |
| None, | |
| js=""" | |
| () => { | |
| const outputText = document.querySelector('#balanced_output textarea').value; | |
| navigator.clipboard.writeText(outputText); | |
| alert('Balanced version copied to clipboard!'); | |
| } | |
| """ | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Model:** [1-800-BAD-CODE/xlm-roberta_punctuation_fullstop_truecase](https://huggingface.co/1-800-BAD-CODE/xlm-roberta_punctuation_fullstop_truecase) | |
| **Supports 47 languages** including: English, French, Spanish, German, Italian, Portuguese, Russian, Turkish, Chinese, Japanese, Arabic, and many more! | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |