Luigi commited on
Commit
01dc9b6
·
1 Parent(s): 16c9738

use ernie 4.5 0.3b as default model

Browse files
Files changed (1) hide show
  1. summarize_transcript.py +4 -6
summarize_transcript.py CHANGED
@@ -18,7 +18,7 @@ def load_model(repo_id, filename):
18
  n_gpu_layers=-1, # Use all layers on GPU
19
  seed=1337,
20
  n_ctx=32768, # Context size
21
- verbose=False, # Reduced verbosity for cleaner output
22
  n_batch=512,
23
  n_ubatch=256,
24
  v_type=2,
@@ -57,11 +57,9 @@ def stream_summarize_transcript(llm, transcript):
57
  stream = llm.create_chat_completion(
58
  messages=messages,
59
  max_tokens=512,
60
- temperature=0.2,
61
  top_p=0.9,
62
- repeat_penalty=1.3,
63
- frequency_penalty=1.5,
64
- presence_penalty=1.0,
65
  stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
66
  stream=True
67
  )
@@ -88,7 +86,7 @@ def main():
88
  default="./transcripts/short.txt",
89
  help="Path to the input transcript file (default: ./transcripts/short.txt)")
90
  parser.add_argument("-m", "--model", type=str,
91
- default="Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF:IQ4_NL",
92
  help="HuggingFace model in format repo_id:quant (e.g., Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF:IQ4_NL)")
93
  args = parser.parse_args()
94
 
 
18
  n_gpu_layers=-1, # Use all layers on GPU
19
  seed=1337,
20
  n_ctx=32768, # Context size
21
+ verbose=True, # Reduced verbosity for cleaner output
22
  n_batch=512,
23
  n_ubatch=256,
24
  v_type=2,
 
57
  stream = llm.create_chat_completion(
58
  messages=messages,
59
  max_tokens=512,
60
+ temperature=0.3,
61
  top_p=0.9,
62
+ repeat_penalty=1.5,
 
 
63
  stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
64
  stream=True
65
  )
 
86
  default="./transcripts/short.txt",
87
  help="Path to the input transcript file (default: ./transcripts/short.txt)")
88
  parser.add_argument("-m", "--model", type=str,
89
+ default="bartowski/baidu_ERNIE-4.5-0.3B-PT-GGUF:Q6_K",
90
  help="HuggingFace model in format repo_id:quant (e.g., Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF:IQ4_NL)")
91
  args = parser.parse_args()
92