Luigi commited on
Commit
0745d93
·
1 Parent(s): f8cb070

output summary unquely in zh-TW

Browse files
Files changed (1) hide show
  1. summarize_transcript.py +11 -23
summarize_transcript.py CHANGED
@@ -18,10 +18,11 @@ def load_model():
18
  seed=1337,
19
  n_ctx=32768, # Context size
20
  verbose=False, # Reduced verbosity for cleaner output
21
- n_batch=1024,
22
- n_ubatch=512,
23
  v_type=2,
24
- k_type=2
 
25
  )
26
 
27
  return llm
@@ -32,30 +33,23 @@ def read_transcript(file_path):
32
  content = f.read()
33
  return content
34
 
35
- def stream_summarize_transcript(llm, transcript, language='zh-TW'):
36
  """
37
  Perform live streaming summary by getting real-time token output from the model.
38
 
39
  Args:
40
  llm: The loaded language model
41
  transcript: The full transcript to summarize
42
- language: Language for the summary ('en' or 'zh-TW')
43
  """
44
 
45
  # Use the model's chat format based on its template
46
- if language == 'en':
47
- messages = [
48
- {"role": "system", "content": "You are a helpful assistant that summarizes transcripts."},
49
- {"role": "user", "content": f"Please summarize the following transcript:\n\n{transcript}"}
50
- ]
51
- else: # Default to zh-TW
52
- messages = [
53
- {"role": "system", "content": "你是一個有助的助手,負責總結轉錄內容。"},
54
- {"role": "user", "content": f"請總結以下內容:\n\n{transcript}"}
55
- ]
56
 
57
  # Generate the summary using streaming completion
58
- print(f"\nStreaming {language} summary:")
59
  print("="*50)
60
 
61
  full_response = ""
@@ -102,18 +96,12 @@ def main():
102
  print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
103
 
104
  # Summarize in Chinese (zh-TW) with streaming
105
- chinese_summary = stream_summarize_transcript(llm, transcript, language='zh-TW')
106
-
107
- # Summarize in English with streaming
108
- english_summary = stream_summarize_transcript(llm, transcript, language='en')
109
 
110
  # Save summaries to files
111
  with open("/home/luigi/tiny-scribe/chinese_summary.txt", 'w', encoding='utf-8') as f:
112
  f.write(chinese_summary)
113
 
114
- with open("/home/luigi/tiny-scribe/english_summary.txt", 'w', encoding='utf-8') as f:
115
- f.write(english_summary)
116
-
117
  print("\nSummaries saved to chinese_summary.txt and english_summary.txt.")
118
 
119
  # Clean up
 
18
  seed=1337,
19
  n_ctx=32768, # Context size
20
  verbose=False, # Reduced verbosity for cleaner output
21
+ n_batch=2048,
22
+ n_ubatch=1024,
23
  v_type=2,
24
+ k_type=2,
25
+ n_threads=6
26
  )
27
 
28
  return llm
 
33
  content = f.read()
34
  return content
35
 
36
+ def stream_summarize_transcript(llm, transcript):
37
  """
38
  Perform live streaming summary by getting real-time token output from the model.
39
 
40
  Args:
41
  llm: The loaded language model
42
  transcript: The full transcript to summarize
 
43
  """
44
 
45
  # Use the model's chat format based on its template
46
+ messages = [
47
+ {"role": "system", "content": "你是一個有助的助手,負責總結轉錄內容。"},
48
+ {"role": "user", "content": f"請總結以下內容:\n\n{transcript}"}
49
+ ]
 
 
 
 
 
 
50
 
51
  # Generate the summary using streaming completion
52
+ print(f"\nStreaming zh-TW summary:")
53
  print("="*50)
54
 
55
  full_response = ""
 
96
  print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
97
 
98
  # Summarize in Chinese (zh-TW) with streaming
99
+ chinese_summary = stream_summarize_transcript(llm, transcript)
 
 
 
100
 
101
  # Save summaries to files
102
  with open("/home/luigi/tiny-scribe/chinese_summary.txt", 'w', encoding='utf-8') as f:
103
  f.write(chinese_summary)
104
 
 
 
 
105
  print("\nSummaries saved to chinese_summary.txt and english_summary.txt.")
106
 
107
  # Clean up