Luigi commited on
Commit
7024e68
·
verified ·
1 Parent(s): 01b1240

Create summarization.py

Browse files
Files changed (1) hide show
  1. src/summarization.py +40 -0
src/summarization.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # summarization.py
2
+ from llama_cpp import Llama
3
+ from utils import available_gguf_llms, s2tw_converter
4
+ import time
5
+
6
+ def get_model(gguf_repo_id, gguf_filename):
7
+ return Llama.from_pretrained(
8
+ repo_id=gguf_repo_id,
9
+ filename=gguf_filename,
10
+ verbose=False,
11
+ n_ctx=32768,
12
+ n_threads=4,
13
+ repeat_penalty=1.2,
14
+ )
15
+
16
+ def summarize_transcript(transcript, selected_gguf_model, prompt_input):
17
+ repo_id, filename = available_gguf_llms[selected_gguf_model]
18
+ llm = get_model(repo_id, filename)
19
+ full_summary = []
20
+ is_1st_token = True
21
+ t1 = time.time()
22
+
23
+ stream = llm.create_chat_completion(
24
+ messages=[
25
+ {"role": "system", "content": "You are an expert in transcript summarization."},
26
+ {"role": "user", "content": f'{prompt_input} \n{transcript}'}
27
+ ],
28
+ stream=True,
29
+ )
30
+
31
+ for chunk in stream:
32
+ delta = chunk['choices'][0]['delta']
33
+ if 'content' in delta:
34
+ if is_1st_token:
35
+ print(f"Time to 1st Token: {time.time()-t1:.1f} sec")
36
+ is_1st_token = False
37
+ token = delta['content']
38
+ full_summary.append(str(token))
39
+ yield s2tw_converter.convert("".join(full_summary)), "Summarizing"
40
+ yield s2tw_converter.convert("".join(full_summary)), "Summary complete"