Spaces:

Luigi
/

tiny-scribe

Running

App Files Files Community

Luigi commited on Jan 29

Commit

f8cb070

1 Parent(s): d59740a

switch from tiny falcon 100m to ernie 4.5 21b a3b

Browse files

Files changed (1) hide show

summarize_transcript.py +8 -12

summarize_transcript.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Script to summarize transcript using Falcon-H1-Tiny-Multilingual model with SYCL acceleration.
 """
 import os
@@ -12,8 +12,8 @@ def load_model():
     # Initialize the model with SYCL support
     llm = Llama.from_pretrained(
-        repo_id="Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
-        filename="*IQ4_NL.gguf",
         n_gpu_layers=-1,  # Use all layers on GPU
         seed=1337,
         n_ctx=32768,       # Context size
@@ -41,12 +41,6 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
         transcript: The full transcript to summarize
         language: Language for the summary ('en' or 'zh-TW')
     """
-    # Truncate the transcript to fit within the context window
-    max_transcript_length = 1000  # Leave room for prompt and response
-    if len(transcript) > max_transcript_length:
-        transcript = transcript[:max_transcript_length]
-        print(f"Transcript truncated to {max_transcript_length} characters to fit context window.")
     # Use the model's chat format based on its template
     if language == 'en':
@@ -69,9 +63,11 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
     stream = llm.create_chat_completion(
         messages=messages,
         max_tokens=512,
-        temperature=0.3,
         top_p=0.9,
-        repeat_penalty=1.1,
         stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
         stream=True
     )
@@ -93,7 +89,7 @@ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
 def main():
-    print("Loading Falcon-H1-Tiny-Multilingual model with SYCL acceleration...")
     # Load the model
     llm = load_model()

 #!/usr/bin/env python3
 """
+Script to summarize transcript using ERNIE-4.5-21B-A3B-PT-GGUF model with SYCL acceleration.
 """
 import os
     # Initialize the model with SYCL support
     llm = Llama.from_pretrained(
+        repo_id="unsloth/ERNIE-4.5-21B-A3B-PT-GGUF",
+        filename="*TQ1_0.gguf",
         n_gpu_layers=-1,  # Use all layers on GPU
         seed=1337,
         n_ctx=32768,       # Context size
         transcript: The full transcript to summarize
         language: Language for the summary ('en' or 'zh-TW')
     """
     # Use the model's chat format based on its template
     if language == 'en':
     stream = llm.create_chat_completion(
         messages=messages,
         max_tokens=512,
+        temperature=0.2,
         top_p=0.9,
+        repeat_penalty=1.3,
+        frequency_penalty=1.5,
+        presence_penalty=1.0,
         stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
         stream=True
     )
 def main():
+    print("Loading ERNIE-4.5-21B-A3B-PT-GGUF model with SYCL acceleration...")
     # Load the model
     llm = load_model()