Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline | |
| import re | |
| # Load your specific model | |
| # Source: aakashMeghwar01/SindhiLM | |
| generator = pipeline("text-generation", model="aakashMeghwar01/SindhiLM") | |
| def clean_sindhi(text): | |
| # This keeps only Sindhi/Arabic script and removes the UTF-8 noise seen in training | |
| cleaned = re.sub(r'[^\u0600-\u06FF\s]', '', text) | |
| return " ".join(cleaned.split()) | |
| def generate_text(prompt): | |
| # Optimized settings for your 50M parameter model | |
| output = generator( | |
| prompt, | |
| max_new_tokens=40, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| repetition_penalty=1.2 | |
| ) | |
| return clean_sindhi(output[0]['generated_text']) | |
| # Create a beautiful interface | |
| demo = gr.Interface( | |
| fn=generate_text, | |
| inputs=gr.Textbox(lines=3, placeholder="Enter a Sindhi starting phrase...", label="Sindhi Prompt"), | |
| outputs=gr.Textbox(label="SindhiLM Generation"), | |
| title="SindhiLM: Specialized Sindhi GPT-2", | |
| description="This model outperforms mBERT and standard GPT-2 in Sindhi text generation.", | |
| examples=["سنڌ جي ثقافت", "شاهه عبداللطيف", "علم حاصل ڪرڻ"] | |
| ) | |
| demo.launch() |