Luigi commited on
Commit
f8cb070
·
1 Parent(s): d59740a

switch from tiny falcon 100m to ernie 4.5 21b a3b

Browse files
Files changed (1) hide show
  1. summarize_transcript.py +8 -12
summarize_transcript.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- Script to summarize transcript using Falcon-H1-Tiny-Multilingual model with SYCL acceleration.
4
  """
5
 
6
  import os
@@ -12,8 +12,8 @@ def load_model():
12
 
13
  # Initialize the model with SYCL support
14
  llm = Llama.from_pretrained(
15
- repo_id="Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
16
- filename="*IQ4_NL.gguf",
17
  n_gpu_layers=-1, # Use all layers on GPU
18
  seed=1337,
19
  n_ctx=32768, # Context size
@@ -41,12 +41,6 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
41
  transcript: The full transcript to summarize
42
  language: Language for the summary ('en' or 'zh-TW')
43
  """
44
- # Truncate the transcript to fit within the context window
45
- max_transcript_length = 1000 # Leave room for prompt and response
46
-
47
- if len(transcript) > max_transcript_length:
48
- transcript = transcript[:max_transcript_length]
49
- print(f"Transcript truncated to {max_transcript_length} characters to fit context window.")
50
 
51
  # Use the model's chat format based on its template
52
  if language == 'en':
@@ -69,9 +63,11 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
69
  stream = llm.create_chat_completion(
70
  messages=messages,
71
  max_tokens=512,
72
- temperature=0.3,
73
  top_p=0.9,
74
- repeat_penalty=1.1,
 
 
75
  stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
76
  stream=True
77
  )
@@ -93,7 +89,7 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
93
 
94
 
95
  def main():
96
- print("Loading Falcon-H1-Tiny-Multilingual model with SYCL acceleration...")
97
 
98
  # Load the model
99
  llm = load_model()
 
1
  #!/usr/bin/env python3
2
  """
3
+ Script to summarize transcript using ERNIE-4.5-21B-A3B-PT-GGUF model with SYCL acceleration.
4
  """
5
 
6
  import os
 
12
 
13
  # Initialize the model with SYCL support
14
  llm = Llama.from_pretrained(
15
+ repo_id="unsloth/ERNIE-4.5-21B-A3B-PT-GGUF",
16
+ filename="*TQ1_0.gguf",
17
  n_gpu_layers=-1, # Use all layers on GPU
18
  seed=1337,
19
  n_ctx=32768, # Context size
 
41
  transcript: The full transcript to summarize
42
  language: Language for the summary ('en' or 'zh-TW')
43
  """
 
 
 
 
 
 
44
 
45
  # Use the model's chat format based on its template
46
  if language == 'en':
 
63
  stream = llm.create_chat_completion(
64
  messages=messages,
65
  max_tokens=512,
66
+ temperature=0.2,
67
  top_p=0.9,
68
+ repeat_penalty=1.3,
69
+ frequency_penalty=1.5,
70
+ presence_penalty=1.0,
71
  stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
72
  stream=True
73
  )
 
89
 
90
 
91
  def main():
92
+ print("Loading ERNIE-4.5-21B-A3B-PT-GGUF model with SYCL acceleration...")
93
 
94
  # Load the model
95
  llm = load_model()