import gradio as gr from sentence_transformers import SentenceTransformer print("Loading IndoBERT model...") MODEL_NAME = "indobenchmark/indobert-base-p1" model = SentenceTransformer(MODEL_NAME) print("Model loaded!") def embed_single(text): """For Gradio interface - single text""" if not text: return {"error": "Text required"} embedding = model.encode(text, normalize_embeddings=True).tolist() return { "success": True, "embedding": embedding, "dimension": len(embedding) } def embed_batch(texts): """For Gradio interface - batch texts""" if not texts: return {"error": "Texts required"} text_list = [t.strip() for t in texts.split('\n') if t.strip()] embeddings = model.encode(text_list, normalize_embeddings=True).tolist() return { "success": True, "embeddings": embeddings, "count": len(embeddings), "dimension": len(embeddings[0]) if embeddings else 0 } # Gradio Interface with gr.Blocks() as demo: gr.Markdown("# 🇮🇩 IndoBERT Embedding API") with gr.Tab("Single"): input_single = gr.Textbox(label="Text", lines=3, placeholder="Enter Indonesian text...") btn_single = gr.Button("Generate Embedding") output_single = gr.JSON(label="Result") btn_single.click(embed_single, inputs=input_single, outputs=output_single) with gr.Tab("Batch"): input_batch = gr.Textbox( label="Texts (one per line)", lines=10, placeholder="Enter multiple Indonesian texts, one per line..." ) btn_batch = gr.Button("Generate Batch Embeddings") output_batch = gr.JSON(label="Result") btn_batch.click(embed_batch, inputs=input_batch, outputs=output_batch) demo.launch()