SindhiGPT / app.py
aakashMeghwar01's picture
Create app.py
e2eb53d verified
import gradio as gr
from transformers import pipeline
import re
# Load your specific model
# Source: aakashMeghwar01/SindhiLM
generator = pipeline("text-generation", model="aakashMeghwar01/SindhiLM")
def clean_sindhi(text):
# This keeps only Sindhi/Arabic script and removes the UTF-8 noise seen in training
cleaned = re.sub(r'[^\u0600-\u06FF\s]', '', text)
return " ".join(cleaned.split())
def generate_text(prompt):
# Optimized settings for your 50M parameter model
output = generator(
prompt,
max_new_tokens=40,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2
)
return clean_sindhi(output[0]['generated_text'])
# Create a beautiful interface
demo = gr.Interface(
fn=generate_text,
inputs=gr.Textbox(lines=3, placeholder="Enter a Sindhi starting phrase...", label="Sindhi Prompt"),
outputs=gr.Textbox(label="SindhiLM Generation"),
title="SindhiLM: Specialized Sindhi GPT-2",
description="This model outperforms mBERT and standard GPT-2 in Sindhi text generation.",
examples=["سنڌ جي ثقافت", "شاهه عبداللطيف", "علم حاصل ڪرڻ"]
)
demo.launch()