Spaces:
Running
Running
output summary unquely in zh-TW
Browse files- summarize_transcript.py +11 -23
summarize_transcript.py
CHANGED
|
@@ -18,10 +18,11 @@ def load_model():
|
|
| 18 |
seed=1337,
|
| 19 |
n_ctx=32768, # Context size
|
| 20 |
verbose=False, # Reduced verbosity for cleaner output
|
| 21 |
-
n_batch=
|
| 22 |
-
n_ubatch=
|
| 23 |
v_type=2,
|
| 24 |
-
k_type=2
|
|
|
|
| 25 |
)
|
| 26 |
|
| 27 |
return llm
|
|
@@ -32,30 +33,23 @@ def read_transcript(file_path):
|
|
| 32 |
content = f.read()
|
| 33 |
return content
|
| 34 |
|
| 35 |
-
def stream_summarize_transcript(llm, transcript
|
| 36 |
"""
|
| 37 |
Perform live streaming summary by getting real-time token output from the model.
|
| 38 |
|
| 39 |
Args:
|
| 40 |
llm: The loaded language model
|
| 41 |
transcript: The full transcript to summarize
|
| 42 |
-
language: Language for the summary ('en' or 'zh-TW')
|
| 43 |
"""
|
| 44 |
|
| 45 |
# Use the model's chat format based on its template
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
]
|
| 51 |
-
else: # Default to zh-TW
|
| 52 |
-
messages = [
|
| 53 |
-
{"role": "system", "content": "你是一個有助的助手,負責總結轉錄內容。"},
|
| 54 |
-
{"role": "user", "content": f"請總結以下內容:\n\n{transcript}"}
|
| 55 |
-
]
|
| 56 |
|
| 57 |
# Generate the summary using streaming completion
|
| 58 |
-
print(f"\nStreaming
|
| 59 |
print("="*50)
|
| 60 |
|
| 61 |
full_response = ""
|
|
@@ -102,18 +96,12 @@ def main():
|
|
| 102 |
print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
|
| 103 |
|
| 104 |
# Summarize in Chinese (zh-TW) with streaming
|
| 105 |
-
chinese_summary = stream_summarize_transcript(llm, transcript
|
| 106 |
-
|
| 107 |
-
# Summarize in English with streaming
|
| 108 |
-
english_summary = stream_summarize_transcript(llm, transcript, language='en')
|
| 109 |
|
| 110 |
# Save summaries to files
|
| 111 |
with open("/home/luigi/tiny-scribe/chinese_summary.txt", 'w', encoding='utf-8') as f:
|
| 112 |
f.write(chinese_summary)
|
| 113 |
|
| 114 |
-
with open("/home/luigi/tiny-scribe/english_summary.txt", 'w', encoding='utf-8') as f:
|
| 115 |
-
f.write(english_summary)
|
| 116 |
-
|
| 117 |
print("\nSummaries saved to chinese_summary.txt and english_summary.txt.")
|
| 118 |
|
| 119 |
# Clean up
|
|
|
|
| 18 |
seed=1337,
|
| 19 |
n_ctx=32768, # Context size
|
| 20 |
verbose=False, # Reduced verbosity for cleaner output
|
| 21 |
+
n_batch=2048,
|
| 22 |
+
n_ubatch=1024,
|
| 23 |
v_type=2,
|
| 24 |
+
k_type=2,
|
| 25 |
+
n_threads=6
|
| 26 |
)
|
| 27 |
|
| 28 |
return llm
|
|
|
|
| 33 |
content = f.read()
|
| 34 |
return content
|
| 35 |
|
| 36 |
+
def stream_summarize_transcript(llm, transcript):
|
| 37 |
"""
|
| 38 |
Perform live streaming summary by getting real-time token output from the model.
|
| 39 |
|
| 40 |
Args:
|
| 41 |
llm: The loaded language model
|
| 42 |
transcript: The full transcript to summarize
|
|
|
|
| 43 |
"""
|
| 44 |
|
| 45 |
# Use the model's chat format based on its template
|
| 46 |
+
messages = [
|
| 47 |
+
{"role": "system", "content": "你是一個有助的助手,負責總結轉錄內容。"},
|
| 48 |
+
{"role": "user", "content": f"請總結以下內容:\n\n{transcript}"}
|
| 49 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# Generate the summary using streaming completion
|
| 52 |
+
print(f"\nStreaming zh-TW summary:")
|
| 53 |
print("="*50)
|
| 54 |
|
| 55 |
full_response = ""
|
|
|
|
| 96 |
print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
|
| 97 |
|
| 98 |
# Summarize in Chinese (zh-TW) with streaming
|
| 99 |
+
chinese_summary = stream_summarize_transcript(llm, transcript)
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
# Save summaries to files
|
| 102 |
with open("/home/luigi/tiny-scribe/chinese_summary.txt", 'w', encoding='utf-8') as f:
|
| 103 |
f.write(chinese_summary)
|
| 104 |
|
|
|
|
|
|
|
|
|
|
| 105 |
print("\nSummaries saved to chinese_summary.txt and english_summary.txt.")
|
| 106 |
|
| 107 |
# Clean up
|