khansagiffany commited on
Commit
e74dd14
·
verified ·
1 Parent(s): 91acb24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -27
app.py CHANGED
@@ -1,36 +1,18 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModel
3
- import torch
4
 
5
  print("Loading IndoBERT model...")
6
  MODEL_NAME = "indobenchmark/indobert-base-p1"
7
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
- model = AutoModel.from_pretrained(MODEL_NAME)
9
- model.eval()
10
  print("Model loaded!")
11
 
12
- def mean_pooling(model_output, attention_mask):
13
- token_embeddings = model_output[0]
14
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
15
- return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
16
-
17
- def generate_embedding(text):
18
- encoded_input = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt')
19
-
20
- with torch.no_grad():
21
- model_output = model(**encoded_input)
22
-
23
- embedding = mean_pooling(model_output, encoded_input['attention_mask'])
24
- embedding = torch.nn.functional.normalize(embedding, p=2, dim=1)
25
-
26
- return embedding[0].numpy().tolist()
27
-
28
  def embed_single(text):
29
  """For Gradio interface - single text"""
30
  if not text:
31
  return {"error": "Text required"}
32
 
33
- embedding = generate_embedding(text)
 
34
  return {
35
  "success": True,
36
  "embedding": embedding,
@@ -43,7 +25,7 @@ def embed_batch(texts):
43
  return {"error": "Texts required"}
44
 
45
  text_list = [t.strip() for t in texts.split('\n') if t.strip()]
46
- embeddings = [generate_embedding(text) for text in text_list]
47
 
48
  return {
49
  "success": True,
@@ -57,14 +39,18 @@ with gr.Blocks() as demo:
57
  gr.Markdown("# 🇮🇩 IndoBERT Embedding API")
58
 
59
  with gr.Tab("Single"):
60
- input_single = gr.Textbox(label="Text", lines=3)
61
- btn_single = gr.Button("Generate")
62
  output_single = gr.JSON(label="Result")
63
  btn_single.click(embed_single, inputs=input_single, outputs=output_single)
64
 
65
  with gr.Tab("Batch"):
66
- input_batch = gr.Textbox(label="Texts (one per line)", lines=10)
67
- btn_batch = gr.Button("Generate Batch")
 
 
 
 
68
  output_batch = gr.JSON(label="Result")
69
  btn_batch.click(embed_batch, inputs=input_batch, outputs=output_batch)
70
 
 
1
  import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
 
3
 
4
  print("Loading IndoBERT model...")
5
  MODEL_NAME = "indobenchmark/indobert-base-p1"
6
+ model = SentenceTransformer(MODEL_NAME)
 
 
7
  print("Model loaded!")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def embed_single(text):
10
  """For Gradio interface - single text"""
11
  if not text:
12
  return {"error": "Text required"}
13
 
14
+ embedding = model.encode(text, normalize_embeddings=True).tolist()
15
+
16
  return {
17
  "success": True,
18
  "embedding": embedding,
 
25
  return {"error": "Texts required"}
26
 
27
  text_list = [t.strip() for t in texts.split('\n') if t.strip()]
28
+ embeddings = model.encode(text_list, normalize_embeddings=True).tolist()
29
 
30
  return {
31
  "success": True,
 
39
  gr.Markdown("# 🇮🇩 IndoBERT Embedding API")
40
 
41
  with gr.Tab("Single"):
42
+ input_single = gr.Textbox(label="Text", lines=3, placeholder="Enter Indonesian text...")
43
+ btn_single = gr.Button("Generate Embedding")
44
  output_single = gr.JSON(label="Result")
45
  btn_single.click(embed_single, inputs=input_single, outputs=output_single)
46
 
47
  with gr.Tab("Batch"):
48
+ input_batch = gr.Textbox(
49
+ label="Texts (one per line)",
50
+ lines=10,
51
+ placeholder="Enter multiple Indonesian texts, one per line..."
52
+ )
53
+ btn_batch = gr.Button("Generate Batch Embeddings")
54
  output_batch = gr.JSON(label="Result")
55
  btn_batch.click(embed_batch, inputs=input_batch, outputs=output_batch)
56