Spaces:

InventorsHub
/

SwarmChat

Sleeping

InventorsHub commited on Jun 15, 2025

Commit

dc3ea55

verified ·

1 Parent(s): f12c6da

Update text_processing.py

Files changed (1) hide show

text_processing.py CHANGED Viewed

@@ -1,13 +1,8 @@
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import spaces
 # Download the single GGUF shard by its repo path:
-model_path = hf_hub_download(
-    repo_id="Inventors-Hub/SwarmChat-models",
-    repo_type="model",
-    filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
-)
 # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
 # llm = Llama(
@@ -19,9 +14,15 @@ model_path = hf_hub_download(
 #     use_mlock=False,
 # )
 # print("Llama backend initialized successfully!")
-llm = None
 @spaces.GPU
 def llm_gpu():
     llm = Llama(
         model_path=model_path,
@@ -46,6 +47,7 @@ def translate_text(text):
     <|im_end|>
     <|im_start|>assistant
     """
     output = llm(input_prompt, max_tokens=1024, temperature=0)
     translated_text = output.get("choices", [{}])[0].get("text", "").strip()

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import spaces
+import functools
 # Download the single GGUF shard by its repo path:
 # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
 # llm = Llama(
 #     use_mlock=False,
 # )
 # print("Llama backend initialized successfully!")
+@functools.lru_cache(maxsize=1)
 @spaces.GPU
 def llm_gpu():
+    model_path = hf_hub_download(
+        repo_id="Inventors-Hub/SwarmChat-models",
+        repo_type="model",
+        filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
+    )
     llm = Llama(
         model_path=model_path,
     <|im_end|>
     <|im_start|>assistant
     """
+    llm = llm_gpu()
     output = llm(input_prompt, max_tokens=1024, temperature=0)
     translated_text = output.get("choices", [{}])[0].get("text", "").strip()