InventorsHub commited on
Commit
dc3ea55
·
verified ·
1 Parent(s): f12c6da

Update text_processing.py

Browse files
Files changed (1) hide show
  1. text_processing.py +9 -7
text_processing.py CHANGED
@@ -1,13 +1,8 @@
1
  from llama_cpp import Llama
2
  from huggingface_hub import hf_hub_download
3
  import spaces
4
-
5
  # Download the single GGUF shard by its repo path:
6
- model_path = hf_hub_download(
7
- repo_id="Inventors-Hub/SwarmChat-models",
8
- repo_type="model",
9
- filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
10
- )
11
 
12
  # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
13
  # llm = Llama(
@@ -19,9 +14,15 @@ model_path = hf_hub_download(
19
  # use_mlock=False,
20
  # )
21
  # print("Llama backend initialized successfully!")
22
- llm = None
23
  @spaces.GPU
24
  def llm_gpu():
 
 
 
 
 
 
25
 
26
  llm = Llama(
27
  model_path=model_path,
@@ -46,6 +47,7 @@ def translate_text(text):
46
  <|im_end|>
47
  <|im_start|>assistant
48
  """
 
49
  output = llm(input_prompt, max_tokens=1024, temperature=0)
50
 
51
  translated_text = output.get("choices", [{}])[0].get("text", "").strip()
 
1
  from llama_cpp import Llama
2
  from huggingface_hub import hf_hub_download
3
  import spaces
4
+ import functools
5
  # Download the single GGUF shard by its repo path:
 
 
 
 
 
6
 
7
  # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
8
  # llm = Llama(
 
14
  # use_mlock=False,
15
  # )
16
  # print("Llama backend initialized successfully!")
17
+ @functools.lru_cache(maxsize=1)
18
  @spaces.GPU
19
  def llm_gpu():
20
+
21
+ model_path = hf_hub_download(
22
+ repo_id="Inventors-Hub/SwarmChat-models",
23
+ repo_type="model",
24
+ filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
25
+ )
26
 
27
  llm = Llama(
28
  model_path=model_path,
 
47
  <|im_end|>
48
  <|im_start|>assistant
49
  """
50
+ llm = llm_gpu()
51
  output = llm(input_prompt, max_tokens=1024, temperature=0)
52
 
53
  translated_text = output.get("choices", [{}])[0].get("text", "").strip()