InventorsHub commited on
Commit
bf90c83
·
verified ·
1 Parent(s): c5b3be4

Update text_processing.py

Browse files
Files changed (1) hide show
  1. text_processing.py +52 -40
text_processing.py CHANGED
@@ -1,40 +1,52 @@
1
- from llama_cpp import Llama
2
- from huggingface_hub import hf_hub_download
3
-
4
- # Download the single GGUF shard by its repo path:
5
- model_path = hf_hub_download(
6
- repo_id="Inventors-Hub/SwarmChat-models",
7
- repo_type="model",
8
- filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
9
- )
10
-
11
- # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
12
- llm = Llama(
13
- model_path=model_path,
14
- n_ctx=512, # down from 4096
15
- low_vram=True, # llama.cpp low-vram mode
16
- f16_kv=True, # half-precision kv cache
17
- use_mmap=True, # mmap file
18
- use_mlock=False,
19
- )
20
- # print("Llama backend initialized successfully!")
21
-
22
-
23
-
24
- # Function to process text using EuroLLM
25
- def translate_text(text):
26
- input_prompt = f"""
27
- <|im_start|>system
28
- <|im_end|>
29
- <|im_start|>user
30
- Translate the following text to English:
31
- Text: {text}
32
- English:
33
- <|im_end|>
34
- <|im_start|>assistant
35
- """
36
- output = llm(input_prompt, max_tokens=1024, temperature=0)
37
-
38
- translated_text = output.get("choices", [{}])[0].get("text", "").strip()
39
-
40
- return translated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ from huggingface_hub import hf_hub_download
3
+
4
+ # Download the single GGUF shard by its repo path:
5
+ model_path = hf_hub_download(
6
+ repo_id="Inventors-Hub/SwarmChat-models",
7
+ repo_type="model",
8
+ filename="EuroLLM-9B-Instruct-Q4_K_M.gguf",
9
+ )
10
+
11
+ # llm = Llama(model_path=model_path, n_ctx=1024)#, verbose=True)
12
+ # llm = Llama(
13
+ # model_path=model_path,
14
+ # n_ctx=512, # down from 4096
15
+ # low_vram=True, # llama.cpp low-vram mode
16
+ # f16_kv=True, # half-precision kv cache
17
+ # use_mmap=True, # mmap file
18
+ # use_mlock=False,
19
+ # )
20
+ # print("Llama backend initialized successfully!")
21
+ llm = None
22
+ @spaces.GPU
23
+ def llm_gpu():
24
+
25
+ llm = Llama(
26
+ model_path=model_path,
27
+ n_ctx=512, # down from 4096
28
+ low_vram=True, # llama.cpp low-vram mode
29
+ f16_kv=True, # half-precision kv cache
30
+ use_mmap=True, # mmap file
31
+ use_mlock=False,
32
+ )
33
+ return llm
34
+
35
+
36
+ # Function to process text using EuroLLM
37
+ def translate_text(text):
38
+ input_prompt = f"""
39
+ <|im_start|>system
40
+ <|im_end|>
41
+ <|im_start|>user
42
+ Translate the following text to English:
43
+ Text: {text}
44
+ English:
45
+ <|im_end|>
46
+ <|im_start|>assistant
47
+ """
48
+ output = llm(input_prompt, max_tokens=1024, temperature=0)
49
+
50
+ translated_text = output.get("choices", [{}])[0].get("text", "").strip()
51
+
52
+ return translated_text