Spaces:

tieuyentu
/

aiapppp

Sleeping

App Files Files Community

tieuyentu commited on Dec 5, 2025

Commit

a40c7f4

verified ·

1 Parent(s): 20df720

Upload app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import gradio as gr
+import torch
+import re
+from transformers import (
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    WhisperProcessor,
+    WhisperForConditionalGeneration
+)
+device = "cpu"
+# -----------------------------
+# LOAD MODELS
+# -----------------------------
+print("Loading Whisper-small...")
+asr_processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
+print("Loading ViT5 summarization model...")
+sum_tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-base-vietnews-summarization")
+sum_model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/vit5-base-vietnews-summarization").to(device)
+# -----------------------------
+# TRANSCRIPT CLEANER
+# -----------------------------
+def clean_transcript(text):
+    filler_words = [
+        r"\bờ\b", r"\bừm\b", r"\bơ\b", r"\bờm\b", r"\ba\b", r"\bà\b",
+        r"\bkiểu như\b", r"\bkiểu\b",
+        r"\bnói chung\b",
+        r"\bý là\b",
+        r"\bok\b", r"\bkiểu kiểu\b",
+        r"\btức là\b",
+        r"\bthì\b",
+    ]
+    cleaned = text.lower()
+    for fw in filler_words:
+        cleaned = re.sub(fw, "", cleaned)
+    # Xóa khoảng trắng dư
+    cleaned = re.sub(r"\s+", " ", cleaned).strip()
+    # Viết hoa đầu câu
+    cleaned = ". ".join(s.strip().capitalize() for s in cleaned.split(".") if s.strip())
+    return cleaned
+# -----------------------------
+# NOTE MAKER (ViT5)
+# -----------------------------
+def make_notes(text):
+    if text.strip() == "":
+        return ""
+    prompt = "bullet_points: " + text
+    inputs = sum_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
+    with torch.no_grad():
+        ids = sum_model.generate(
+            inputs["input_ids"],
+            max_length=200,
+            num_beams=4,
+            early_stopping=True,
+        )
+    notes = sum_tokenizer.decode(ids[0], skip_special_tokens=True)
+    notes = notes.replace("•", "\n• ")  # dễ đọc hơn
+    return notes
+# -----------------------------
+# ASR SPEECH → TEXT
+# -----------------------------
+def speech_to_text(audio):
+    sr, wav = audio
+    inputs = asr_processor(wav, sampling_rate=sr, return_tensors="pt").input_features
+    with torch.no_grad():
+        ids = asr_model.generate(inputs)
+    text = asr_processor.batch_decode(ids, skip_special_tokens=True)[0]
+    return text
+# -----------------------------
+# FULL PIPELINE
+# -----------------------------
+def pipeline(audio):
+    raw_text = speech_to_text(audio)
+    cleaned = clean_transcript(raw_text)
+    summary = summarize_text(cleaned)
+    notes = make_notes(cleaned)
+    return raw_text, cleaned, summary, notes
+# (reuse the previous summarize_text function)
+def summarize_text(text):
+    inputs = sum_tokenizer("summarize: " + text, return_tensors="pt", truncation=True).to(device)
+    with torch.no_grad():
+        ids = sum_model.generate(inputs["input_ids"], num_beams=4, max_length=150)
+    return sum_tokenizer.decode(ids[0], skip_special_tokens=True)
+# -----------------------------
+# GRADIO UI
+# -----------------------------
+with gr.Blocks() as app:
+    gr.Markdown("## 🎧 Speech → Text → Cleaner → Summary → Notes")
+    audio_in = gr.Audio(type="numpy", label="Upload / Record Audio")
+    raw_out = gr.Textbox(label="Raw Transcript (Whisper)", lines=6)
+    clean_out = gr.Textbox(label="Cleaned Transcript", lines=6)
+    summary_out = gr.Textbox(label="Summary", lines=5)
+    notes_out = gr.Textbox(label="AI Notes", lines=6)
+    btn = gr.Button("Run")
+    btn.click(
+        pipeline,
+        inputs=audio_in,
+        outputs=[raw_out, clean_out, summary_out, notes_out]
+    )
+app.launch()