karani_v0 / app.py
Bur3hani's picture
Create app.py
1fb6397 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer
# --- Configuration ---
# IMPORTANT: Replace with the actual HF Hub repo name where you pushed your model
MODEL_NAME = "Bur3hani/karani-afro-xlmr-base-finetuned-swahili"
# --- End Configuration ---
# Load the tokenizer to access the mask token easily
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
mask_token = tokenizer.mask_token if tokenizer.mask_token else "[MASK]" # Default if somehow missing
except Exception as e:
print(f"Warning: Could not load tokenizer separately. Using default mask '[MASK]'. Error: {e}")
mask_token = "[MASK]"
# Load the fill-mask pipeline using your fine-tuned model
try:
mask_filler = pipeline("fill-mask", model=MODEL_NAME, tokenizer=MODEL_NAME)
print(f"Pipeline loaded successfully for model: {MODEL_NAME}")
except Exception as e:
print(f"Error loading pipeline: {e}")
# Handle error, maybe disable the interface or show an error message
mask_filler = None
# Define the function that Gradio will call
def fill_mask_kiswahili(sentence_with_mask):
if mask_filler is None:
return "ERROR: Model pipeline could not be loaded."
if mask_token not in sentence_with_mask:
return f"ERROR: Input sentence must contain the mask token: {mask_token}"
try:
predictions = mask_filler(sentence_with_mask, top_k=5)
# Format the output
output_text = f"Input: {sentence_with_mask}\n\nPredictions:\n"
output_text += "-------------------\n"
for i, pred in enumerate(predictions):
token_str = pred['token_str'].replace(' ', '').strip() # Clean token
sequence_reconstructed = sentence_with_mask.replace(mask_token, token_str)
score = pred['score']
output_text += f"{i+1}. {token_str} (Score: {score:.4f})\n -> '{sequence_reconstructed}'\n"
return output_text
except Exception as e:
return f"An error occurred during prediction: {e}"
# Create the Gradio interface
iface = gr.Interface(
fn=fill_mask_kiswahili,
inputs=gr.Textbox(lines=2, label="Sentence in Kiswahili (use '[MASK]' for the blank)", placeholder=f"Example: Leo hali ya hewa ni {mask_token} sana."),
outputs=gr.Textbox(label="Top 5 Predictions"),
title="Karani Kiswahili Assistant (Fill-Mask Demo)",
description="Demo of a Kiswahili language model fine-tuned using Afro-XLMR-Base. Type a sentence in Kiswahili and use '[MASK]' where you want the model to predict a word.",
examples=[
[f"Mradi huu wa {mask_token} utasaidia sana."],
[f"Alikwenda sokoni kununua {mask_token}."],
[f"Ninasoma {mask_token} kwa makini."],
],
allow_flagging="never" # Disable flagging for simplicity
)
# Launch the interface (when running app.py in the Space)
if __name__ == "__main__":
iface.launch()