Spaces:
Running
Running
switch from tiny falcon 100m to ernie 4.5 21b a3b
Browse files- summarize_transcript.py +8 -12
summarize_transcript.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
Script to summarize transcript using
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
@@ -12,8 +12,8 @@ def load_model():
|
|
| 12 |
|
| 13 |
# Initialize the model with SYCL support
|
| 14 |
llm = Llama.from_pretrained(
|
| 15 |
-
repo_id="
|
| 16 |
-
filename="*
|
| 17 |
n_gpu_layers=-1, # Use all layers on GPU
|
| 18 |
seed=1337,
|
| 19 |
n_ctx=32768, # Context size
|
|
@@ -41,12 +41,6 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
|
|
| 41 |
transcript: The full transcript to summarize
|
| 42 |
language: Language for the summary ('en' or 'zh-TW')
|
| 43 |
"""
|
| 44 |
-
# Truncate the transcript to fit within the context window
|
| 45 |
-
max_transcript_length = 1000 # Leave room for prompt and response
|
| 46 |
-
|
| 47 |
-
if len(transcript) > max_transcript_length:
|
| 48 |
-
transcript = transcript[:max_transcript_length]
|
| 49 |
-
print(f"Transcript truncated to {max_transcript_length} characters to fit context window.")
|
| 50 |
|
| 51 |
# Use the model's chat format based on its template
|
| 52 |
if language == 'en':
|
|
@@ -69,9 +63,11 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
|
|
| 69 |
stream = llm.create_chat_completion(
|
| 70 |
messages=messages,
|
| 71 |
max_tokens=512,
|
| 72 |
-
temperature=0.
|
| 73 |
top_p=0.9,
|
| 74 |
-
repeat_penalty=1.
|
|
|
|
|
|
|
| 75 |
stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
|
| 76 |
stream=True
|
| 77 |
)
|
|
@@ -93,7 +89,7 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
|
|
| 93 |
|
| 94 |
|
| 95 |
def main():
|
| 96 |
-
print("Loading
|
| 97 |
|
| 98 |
# Load the model
|
| 99 |
llm = load_model()
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
Script to summarize transcript using ERNIE-4.5-21B-A3B-PT-GGUF model with SYCL acceleration.
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 12 |
|
| 13 |
# Initialize the model with SYCL support
|
| 14 |
llm = Llama.from_pretrained(
|
| 15 |
+
repo_id="unsloth/ERNIE-4.5-21B-A3B-PT-GGUF",
|
| 16 |
+
filename="*TQ1_0.gguf",
|
| 17 |
n_gpu_layers=-1, # Use all layers on GPU
|
| 18 |
seed=1337,
|
| 19 |
n_ctx=32768, # Context size
|
|
|
|
| 41 |
transcript: The full transcript to summarize
|
| 42 |
language: Language for the summary ('en' or 'zh-TW')
|
| 43 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Use the model's chat format based on its template
|
| 46 |
if language == 'en':
|
|
|
|
| 63 |
stream = llm.create_chat_completion(
|
| 64 |
messages=messages,
|
| 65 |
max_tokens=512,
|
| 66 |
+
temperature=0.2,
|
| 67 |
top_p=0.9,
|
| 68 |
+
repeat_penalty=1.3,
|
| 69 |
+
frequency_penalty=1.5,
|
| 70 |
+
presence_penalty=1.0,
|
| 71 |
stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
|
| 72 |
stream=True
|
| 73 |
)
|
|
|
|
| 89 |
|
| 90 |
|
| 91 |
def main():
|
| 92 |
+
print("Loading ERNIE-4.5-21B-A3B-PT-GGUF model with SYCL acceleration...")
|
| 93 |
|
| 94 |
# Load the model
|
| 95 |
llm = load_model()
|