Luigi commited on
Commit
d4fd1c3
·
1 Parent(s): 7ac9e1f

refactor: fix streaming summary bug and simplify to streaming-only output

Browse files
Files changed (2) hide show
  1. README.md +37 -0
  2. summarize_transcript.py +81 -26
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Transcript Summarization Script
2
+
3
+ This script provides functionality to summarize transcripts using the Falcon-H1-Tiny-Multilingual model with SYCL acceleration. It focuses on live streaming summarization for immediate feedback.
4
+
5
+ ## Key Features
6
+
7
+ ### 1. State Isolation
8
+ Each summarization call ensures a clean state by calling `llm.reset()` after each operation. This prevents any carryover from previous summarizations, ensuring consistent and independent results.
9
+
10
+ ### 2. Live Streaming Summary
11
+ The script implements a live streaming summary feature that generates the summary in real-time, displaying tokens as they are produced by the model. This provides immediate feedback.
12
+
13
+ ### 3. Multi-language Support
14
+ The script supports both English and Traditional Chinese (zh-TW) summarization.
15
+
16
+ ## Functions
17
+
18
+ ### `stream_summarize_transcript(llm, transcript, language='zh-TW')`
19
+ Performs live streaming summary by generating the summary in real-time and displaying tokens as they are produced by the model.
20
+
21
+ ## Improvements Made
22
+
23
+ 1. **Streaming-Only Workflow**: Simplified the script to focus on real-time streaming for all summaries.
24
+ 2. **State Isolation**: Added `llm.reset()` calls after each summarization to ensure clean state between operations.
25
+ 3. **True Live Streaming**: Implemented real-time token streaming using `create_chat_completion` for immediate output display.
26
+ 4. **Reduced Verbosity**: Set `verbose=False` for cleaner output during model operations.
27
+
28
+ ## Usage
29
+
30
+ ```bash
31
+ python summarize_transcript.py
32
+ ```
33
+
34
+ The script will:
35
+ 1. Load the model.
36
+ 2. Generate Chinese and English summaries using live streaming.
37
+ 3. Save the summaries to `chinese_summary.txt` and `english_summary.txt`.
summarize_transcript.py CHANGED
@@ -9,7 +9,7 @@ from huggingface_hub import hf_hub_download
9
 
10
  def load_model():
11
  """Load the model from Hugging Face Hub."""
12
-
13
  # Initialize the model with SYCL support
14
  llm = Llama.from_pretrained(
15
  repo_id="Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
@@ -17,13 +17,13 @@ def load_model():
17
  n_gpu_layers=-1, # Use all layers on GPU
18
  seed=1337,
19
  n_ctx=32768, # Context size
20
- verbose=True,
21
  n_batch=1024,
22
  n_ubatch=512,
23
  v_type=2,
24
  k_type=2
25
  )
26
-
27
  return llm
28
 
29
  def read_transcript(file_path):
@@ -64,46 +64,101 @@ def summarize_transcript(llm, transcript, language='zh-TW'):
64
  stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"]
65
  )
66
 
 
67
  llm.reset()
68
-
69
  return output['choices'][0]['message']['content'].strip()
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def main():
72
  print("Loading Falcon-H1-Tiny-Multilingual model with SYCL acceleration...")
73
-
74
  # Load the model
75
  llm = load_model()
76
-
77
  # Read the transcript
78
  transcript_path = "/home/luigi/tiny-scribe/transcripts/short.txt"
79
  transcript = read_transcript(transcript_path)
80
-
81
- print("\nOriginal Transcript:")
82
  print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
83
-
84
- # Summarize in Chinese (zh-TW)
85
- print("\nGenerating Chinese (zh-TW) summary...")
86
- chinese_summary = summarize_transcript(llm, transcript, language='zh-TW')
87
- print("Chinese Summary:")
88
- print(chinese_summary)
89
-
90
- # Summarize in English
91
- print("\nGenerating English summary...")
92
- english_summary = summarize_transcript(llm, transcript, language='en')
93
- print("English Summary:")
94
- print(english_summary)
95
-
96
  # Save summaries to files
97
  with open("/home/luigi/tiny-scribe/chinese_summary.txt", 'w', encoding='utf-8') as f:
98
  f.write(chinese_summary)
99
-
100
  with open("/home/luigi/tiny-scribe/english_summary.txt", 'w', encoding='utf-8') as f:
101
  f.write(english_summary)
102
-
103
- print("\nSummaries saved to files.")
104
-
105
  # Clean up
106
  del llm
107
 
108
  if __name__ == "__main__":
109
- main()
 
9
 
10
  def load_model():
11
  """Load the model from Hugging Face Hub."""
12
+
13
  # Initialize the model with SYCL support
14
  llm = Llama.from_pretrained(
15
  repo_id="Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF",
 
17
  n_gpu_layers=-1, # Use all layers on GPU
18
  seed=1337,
19
  n_ctx=32768, # Context size
20
+ verbose=False, # Reduced verbosity for cleaner output
21
  n_batch=1024,
22
  n_ubatch=512,
23
  v_type=2,
24
  k_type=2
25
  )
26
+
27
  return llm
28
 
29
  def read_transcript(file_path):
 
64
  stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"]
65
  )
66
 
67
+ # Reset the model state to ensure clean state for next call
68
  llm.reset()
69
+
70
  return output['choices'][0]['message']['content'].strip()
71
 
72
+ def stream_summarize_transcript(llm, transcript, language='zh-TW'):
73
+ """
74
+ Perform live streaming summary by getting real-time token output from the model.
75
+
76
+ Args:
77
+ llm: The loaded language model
78
+ transcript: The full transcript to summarize
79
+ language: Language for the summary ('en' or 'zh-TW')
80
+ """
81
+ # Truncate the transcript to fit within the context window
82
+ max_transcript_length = 1000 # Leave room for prompt and response
83
+
84
+ if len(transcript) > max_transcript_length:
85
+ transcript = transcript[:max_transcript_length]
86
+ print(f"Transcript truncated to {max_transcript_length} characters to fit context window.")
87
+
88
+ # Use the model's chat format based on its template
89
+ if language == 'en':
90
+ messages = [
91
+ {"role": "system", "content": "You are a helpful assistant that summarizes transcripts."},
92
+ {"role": "user", "content": f"Please summarize the following transcript:\n\n{transcript}"}
93
+ ]
94
+ else: # Default to zh-TW
95
+ messages = [
96
+ {"role": "system", "content": "你是一個有助的助手,負責總結轉錄內容。"},
97
+ {"role": "user", "content": f"請總結以下內容:\n\n{transcript}"}
98
+ ]
99
+
100
+ # Generate the summary using streaming completion
101
+ print(f"\nStreaming {language} summary:")
102
+ print("="*50)
103
+
104
+ full_response = ""
105
+
106
+ stream = llm.create_chat_completion(
107
+ messages=messages,
108
+ max_tokens=512,
109
+ temperature=0.3,
110
+ top_p=0.9,
111
+ repeat_penalty=1.1,
112
+ stop=["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
113
+ stream=True
114
+ )
115
+
116
+ for chunk in stream:
117
+ if 'choices' in chunk and len(chunk['choices']) > 0:
118
+ delta = chunk['choices'][0].get('delta', {})
119
+ content = delta.get('content', '')
120
+ if content:
121
+ print(content, end='', flush=True)
122
+ full_response += content
123
+
124
+ print("\n" + "="*50)
125
+
126
+ # Reset the model state to ensure clean state for next call
127
+ llm.reset()
128
+
129
+ return full_response.strip()
130
+
131
+
132
  def main():
133
  print("Loading Falcon-H1-Tiny-Multilingual model with SYCL acceleration...")
134
+
135
  # Load the model
136
  llm = load_model()
137
+
138
  # Read the transcript
139
  transcript_path = "/home/luigi/tiny-scribe/transcripts/short.txt"
140
  transcript = read_transcript(transcript_path)
141
+
142
+ print("\nOriginal Transcript (Preview):")
143
  print(transcript[:500] + "..." if len(transcript) > 500 else transcript)
144
+
145
+ # Summarize in Chinese (zh-TW) with streaming
146
+ chinese_summary = stream_summarize_transcript(llm, transcript, language='zh-TW')
147
+
148
+ # Summarize in English with streaming
149
+ english_summary = stream_summarize_transcript(llm, transcript, language='en')
150
+
 
 
 
 
 
 
151
  # Save summaries to files
152
  with open("/home/luigi/tiny-scribe/chinese_summary.txt", 'w', encoding='utf-8') as f:
153
  f.write(chinese_summary)
154
+
155
  with open("/home/luigi/tiny-scribe/english_summary.txt", 'w', encoding='utf-8') as f:
156
  f.write(english_summary)
157
+
158
+ print("\nSummaries saved to chinese_summary.txt and english_summary.txt.")
159
+
160
  # Clean up
161
  del llm
162
 
163
  if __name__ == "__main__":
164
+ main()