Spaces:

InventorsHub
/

SwarmChat

Sleeping

InventorsHub commited on Jun 15, 2025

Commit

bf90c83

verified ·

1 Parent(s): c5b3be4

Update text_processing.py

Files changed (1) hide show

text_processing.py CHANGED Viewed

@@ -1,40 +1,52 @@
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-# Download the single GGUF shard by its repo path:
-model_path = hf_hub_download(
-    repo_id="Inventors-Hub/SwarmChat-models",
-    repo_type="model",
-    filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
-)
-# llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
-llm = Llama(
-    model_path=model_path,
-    n_ctx=512,            # down from 4096
-    low_vram=True,         # llama.cpp low-vram mode
-    f16_kv=True,           # half-precision kv cache
-    use_mmap=True,         # mmap file
-    use_mlock=False,
-)
-# print("Llama backend initialized successfully!")
-# Function to process text using EuroLLM
-def translate_text(text):
-    input_prompt = f"""
-    <|im_start|>system
-    <|im_end|>
-    <|im_start|>user
-    Translate the following text to English:
-    Text: {text}
-    English:
-    <|im_end|>
-    <|im_start|>assistant
-    """
-    output = llm(input_prompt, max_tokens=1024, temperature=0)
-    translated_text = output.get("choices", [{}])[0].get("text", "").strip()
-    return translated_text

+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+# Download the single GGUF shard by its repo path:
+model_path = hf_hub_download(
+    repo_id="Inventors-Hub/SwarmChat-models",
+    repo_type="model",
+    filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
+)
+# llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
+# llm = Llama(
+#     model_path=model_path,
+#     n_ctx=512,            # down from 4096
+#     low_vram=True,         # llama.cpp low-vram mode
+#     f16_kv=True,           # half-precision kv cache
+#     use_mmap=True,         # mmap file
+#     use_mlock=False,
+# )
+# print("Llama backend initialized successfully!")
+llm = None
+@spaces.GPU
+def llm_gpu():
+    llm = Llama(
+        model_path=model_path,
+        n_ctx=512,            # down from 4096
+        low_vram=True,         # llama.cpp low-vram mode
+        f16_kv=True,           # half-precision kv cache
+        use_mmap=True,         # mmap file
+        use_mlock=False,
+    )
+    return llm
+# Function to process text using EuroLLM
+def translate_text(text):
+    input_prompt = f"""
+    <|im_start|>system
+    <|im_end|>
+    <|im_start|>user
+    Translate the following text to English:
+    Text: {text}
+    English:
+    <|im_end|>
+    <|im_start|>assistant
+    """
+    output = llm(input_prompt, max_tokens=1024, temperature=0)
+    translated_text = output.get("choices", [{}])[0].get("text", "").strip()
+    return translated_text