| import gradio as gr |
| from transformers import pipeline, AutoTokenizer |
|
|
| |
| |
| MODEL_NAME = "Bur3hani/karani-afro-xlmr-base-finetuned-swahili" |
| |
|
|
| |
| try: |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| mask_token = tokenizer.mask_token if tokenizer.mask_token else "[MASK]" |
| except Exception as e: |
| print(f"Warning: Could not load tokenizer separately. Using default mask '[MASK]'. Error: {e}") |
| mask_token = "[MASK]" |
|
|
|
|
| |
| try: |
| mask_filler = pipeline("fill-mask", model=MODEL_NAME, tokenizer=MODEL_NAME) |
| print(f"Pipeline loaded successfully for model: {MODEL_NAME}") |
| except Exception as e: |
| print(f"Error loading pipeline: {e}") |
| |
| mask_filler = None |
|
|
|
|
| |
| def fill_mask_kiswahili(sentence_with_mask): |
| if mask_filler is None: |
| return "ERROR: Model pipeline could not be loaded." |
| if mask_token not in sentence_with_mask: |
| return f"ERROR: Input sentence must contain the mask token: {mask_token}" |
|
|
| try: |
| predictions = mask_filler(sentence_with_mask, top_k=5) |
|
|
| |
| output_text = f"Input: {sentence_with_mask}\n\nPredictions:\n" |
| output_text += "-------------------\n" |
| for i, pred in enumerate(predictions): |
| token_str = pred['token_str'].replace(' ', '').strip() |
| sequence_reconstructed = sentence_with_mask.replace(mask_token, token_str) |
| score = pred['score'] |
| output_text += f"{i+1}. {token_str} (Score: {score:.4f})\n -> '{sequence_reconstructed}'\n" |
| return output_text |
|
|
| except Exception as e: |
| return f"An error occurred during prediction: {e}" |
|
|
|
|
| |
| iface = gr.Interface( |
| fn=fill_mask_kiswahili, |
| inputs=gr.Textbox(lines=2, label="Sentence in Kiswahili (use '[MASK]' for the blank)", placeholder=f"Example: Leo hali ya hewa ni {mask_token} sana."), |
| outputs=gr.Textbox(label="Top 5 Predictions"), |
| title="Karani Kiswahili Assistant (Fill-Mask Demo)", |
| description="Demo of a Kiswahili language model fine-tuned using Afro-XLMR-Base. Type a sentence in Kiswahili and use '[MASK]' where you want the model to predict a word.", |
| examples=[ |
| [f"Mradi huu wa {mask_token} utasaidia sana."], |
| [f"Alikwenda sokoni kununua {mask_token}."], |
| [f"Ninasoma {mask_token} kwa makini."], |
| ], |
| allow_flagging="never" |
| ) |
|
|
| |
| if __name__ == "__main__": |
| iface.launch() |