Spaces:

shrisdiablo
/

dev-yuvabe-app-api

Sleeping

Shri commited on Nov 18, 2025

Commit

d562e10

1 Parent(s): 0cca1ec

fix: json,onnx model id

Files changed (1) hide show

src/chatbot/embedding.py CHANGED Viewed

@@ -1,4 +1,6 @@
 # to run this file you need model.onnx_data on the assets/onnx folder or you can obtain it from here.: https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/tree/main/onnx
 import asyncio
 import os
 from typing import List
@@ -10,7 +12,8 @@ from transformers import AutoTokenizer
 BASE_DIR = os.path.dirname(__file__)
-TOKENIZER_DIR = os.path.abspath(os.path.join(BASE_DIR, "..", "assets", "tokenizer"))
 # MODEL_DIR = os.path.abspath(
 #     os.path.join(BASE_DIR, "..", "assets", "onnx", "model.onnx")
@@ -20,9 +23,7 @@ TOKENIZER_DIR = os.path.abspath(os.path.join(BASE_DIR, "..", "assets", "tokenize
 class EmbeddingModel:
     def __init__(self):
         # print(TOKENIZER_DIR)
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            TOKENIZER_DIR, local_files_only=True
-        )
         # sess_options = ort.SessionOptions()
         # providers = ["CPUExecutionProvider"]
@@ -84,6 +85,12 @@ class EmbeddingModel:
         return input_ids.flatten().tolist()
 embedding_model = EmbeddingModel()

 # to run this file you need model.onnx_data on the assets/onnx folder or you can obtain it from here.: https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/tree/main/onnx
+# model can also be loaded directly from autoModel.pretrained by using the same link "onnx-community/embeddinggemma-300m-ONNX"
 import asyncio
 import os
 from typing import List
 BASE_DIR = os.path.dirname(__file__)
+# TOKENIZER_DIR = os.path.abspath(os.path.join(BASE_DIR, "..", "assets", "tokenizer"))
+TOKENIZER_DIR = "onnx-community/embeddinggemma-300m-ONNX"
 # MODEL_DIR = os.path.abspath(
 #     os.path.join(BASE_DIR, "..", "assets", "onnx", "model.onnx")
 class EmbeddingModel:
     def __init__(self):
         # print(TOKENIZER_DIR)
+        self.tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR)
         # sess_options = ort.SessionOptions()
         # providers = ["CPUExecutionProvider"]
         return input_ids.flatten().tolist()
+def cleanup(self):
+    if self.session:
+        self.session = None
+        print("ONNX runtime session closed.")
 embedding_model = EmbeddingModel()