tieuyentu commited on
Commit
a40c7f4
·
verified ·
1 Parent(s): 20df720

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import re
4
+ from transformers import (
5
+ AutoModelForSeq2SeqLM,
6
+ AutoTokenizer,
7
+ WhisperProcessor,
8
+ WhisperForConditionalGeneration
9
+ )
10
+
11
+
12
+ device = "cpu"
13
+
14
+ # -----------------------------
15
+ # LOAD MODELS
16
+ # -----------------------------
17
+ print("Loading Whisper-small...")
18
+ asr_processor = WhisperProcessor.from_pretrained("openai/whisper-small")
19
+ asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
20
+
21
+ print("Loading ViT5 summarization model...")
22
+ sum_tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base-vietnews-summarization")
23
+ sum_model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/vit5-base-vietnews-summarization").to(device)
24
+
25
+
26
+ # -----------------------------
27
+ # TRANSCRIPT CLEANER
28
+ # -----------------------------
29
+ def clean_transcript(text):
30
+ filler_words = [
31
+ r"\bờ\b", r"\bừm\b", r"\bơ\b", r"\bờm\b", r"\ba\b", r"\bà\b",
32
+ r"\bkiểu như\b", r"\bkiểu\b",
33
+ r"\bnói chung\b",
34
+ r"\bý là\b",
35
+ r"\bok\b", r"\bkiểu kiểu\b",
36
+ r"\btức là\b",
37
+ r"\bthì\b",
38
+ ]
39
+
40
+ cleaned = text.lower()
41
+
42
+ for fw in filler_words:
43
+ cleaned = re.sub(fw, "", cleaned)
44
+
45
+ # Xóa khoảng trắng dư
46
+ cleaned = re.sub(r"\s+", " ", cleaned).strip()
47
+
48
+ # Viết hoa đầu câu
49
+ cleaned = ". ".join(s.strip().capitalize() for s in cleaned.split(".") if s.strip())
50
+
51
+ return cleaned
52
+
53
+
54
+ # -----------------------------
55
+ # NOTE MAKER (ViT5)
56
+ # -----------------------------
57
+ def make_notes(text):
58
+ if text.strip() == "":
59
+ return ""
60
+
61
+ prompt = "bullet_points: " + text
62
+
63
+ inputs = sum_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
64
+
65
+ with torch.no_grad():
66
+ ids = sum_model.generate(
67
+ inputs["input_ids"],
68
+ max_length=200,
69
+ num_beams=4,
70
+ early_stopping=True,
71
+ )
72
+
73
+ notes = sum_tokenizer.decode(ids[0], skip_special_tokens=True)
74
+ notes = notes.replace("•", "\n• ") # dễ đọc hơn
75
+ return notes
76
+
77
+
78
+ # -----------------------------
79
+ # ASR SPEECH → TEXT
80
+ # -----------------------------
81
+ def speech_to_text(audio):
82
+ sr, wav = audio
83
+ inputs = asr_processor(wav, sampling_rate=sr, return_tensors="pt").input_features
84
+
85
+ with torch.no_grad():
86
+ ids = asr_model.generate(inputs)
87
+
88
+ text = asr_processor.batch_decode(ids, skip_special_tokens=True)[0]
89
+ return text
90
+
91
+
92
+ # -----------------------------
93
+ # FULL PIPELINE
94
+ # -----------------------------
95
+ def pipeline(audio):
96
+ raw_text = speech_to_text(audio)
97
+ cleaned = clean_transcript(raw_text)
98
+ summary = summarize_text(cleaned)
99
+ notes = make_notes(cleaned)
100
+ return raw_text, cleaned, summary, notes
101
+
102
+
103
+ # (reuse the previous summarize_text function)
104
+ def summarize_text(text):
105
+ inputs = sum_tokenizer("summarize: " + text, return_tensors="pt", truncation=True).to(device)
106
+ with torch.no_grad():
107
+ ids = sum_model.generate(inputs["input_ids"], num_beams=4, max_length=150)
108
+ return sum_tokenizer.decode(ids[0], skip_special_tokens=True)
109
+
110
+
111
+ # -----------------------------
112
+ # GRADIO UI
113
+ # -----------------------------
114
+ with gr.Blocks() as app:
115
+ gr.Markdown("## 🎧 Speech → Text → Cleaner → Summary → Notes")
116
+
117
+ audio_in = gr.Audio(type="numpy", label="Upload / Record Audio")
118
+
119
+ raw_out = gr.Textbox(label="Raw Transcript (Whisper)", lines=6)
120
+ clean_out = gr.Textbox(label="Cleaned Transcript", lines=6)
121
+ summary_out = gr.Textbox(label="Summary", lines=5)
122
+ notes_out = gr.Textbox(label="AI Notes", lines=6)
123
+
124
+ btn = gr.Button("Run")
125
+
126
+ btn.click(
127
+ pipeline,
128
+ inputs=audio_in,
129
+ outputs=[raw_out, clean_out, summary_out, notes_out]
130
+ )
131
+
132
+ app.launch()