SmartScribe / app.py
thomasanto7001's picture
Update app.py
33cfb41 verified
raw
history blame
4.43 kB
import whisper
from moviepy.video.io.VideoFileClip import VideoFileClip
from transformers import pipeline
import nltk
import os
import re
import random
import subprocess
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
# Download NLTK data once
nltk.download('punkt_tab')
nltk.download('stopwords')
# Global objects (faster)
stop_words = set(stopwords.words('english'))
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1) # Use device=0 if GPU
# 🧠 Function to download YouTube video
def download_youtube_video(youtube_url, filename="youtube_video.mp4"):
print(f"⬇️ Downloading YouTube video via yt-dlp: {youtube_url}")
command = ["yt-dlp", "-f", "best[ext=mp4]+bestaudio/best", "-o", filename, youtube_url]
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
raise Exception("YouTube download failed: " + result.stderr)
return filename
# 🎧 Audio extraction
def extract_audio(video_path):
clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
return audio_path
# πŸ“ Audio transcription
def transcribe_audio(audio_path):
model = whisper.load_model("tiny.en") # Use "base.en" for slightly better quality
result = model.transcribe(audio_path)
return result["text"]
# πŸ“„ Summary generator with batching
def generate_summary(text, default_max_len=130, default_min_len=30):
summarizer = pipeline("summarization")
sentences = sent_tokenize(text)
# Chunk sentences in groups of 10
chunks = [' '.join(sentences[i:i + 10]) for i in range(0, len(sentences), 10)]
summary = ""
for chunk in chunks:
input_len = len(chunk.split())
# Dynamically scale max and min length
dynamic_max = max(20, min(default_max_len, input_len - 1))
dynamic_min = max(10, min(default_min_len, dynamic_max - 10))
# Avoid max_length > input_length error
result = summarizer(
chunk,
max_length=dynamic_max,
min_length=dynamic_min,
do_sample=False
)[0]["summary_text"]
summary += result + " "
return summary.strip()
# ❓ Quiz generator
def generate_quiz(text, num_questions=5):
sentences = sent_tokenize(text)
tfidf = TfidfVectorizer(stop_words='english', max_features=300)
X = tfidf.fit_transform(sentences)
quiz = []
used = set()
for _ in range(num_questions):
i = random.choice([x for x in range(len(sentences)) if x not in used])
used.add(i)
question = sentences[i]
options = [question]
while len(options) < 4:
j = random.randint(0, len(sentences) - 1)
if j != i and sentences[j] not in options:
options.append(sentences[j])
random.shuffle(options)
quiz.append({
"question": question,
"options": options,
"answer": question
})
return "\n\n".join([
f"Q{i + 1}: {q['question']}\nOptions:\n" +
"\n".join([f"{chr(65 + j)}. {opt}" for j, opt in enumerate(q['options'])])
for i, q in enumerate(quiz)
])
# πŸ“Ί Subtitle formatter
def generate_subtitles(text, max_words_per_line=10):
sentences = sent_tokenize(text)
subtitles = []
count = 1
for sentence in sentences:
chunks = [sentence[i:i + max_words_per_line] for i in range(0, len(sentence), max_words_per_line)]
for chunk in chunks:
subtitles.append(f"{count}. {chunk}")
count += 1
return "\n".join(subtitles)
# πŸ§ͺ Master function
def process_video(video_path, selected_services):
results = {}
print("πŸ”§ Extracting audio...")
audio_path = extract_audio(video_path)
transcription = transcribe_audio(audio_path) if "Transcription" in selected_services else None
if transcription:
results["transcription"] = transcription
if "Summary" in selected_services:
results["summary"] = generate_summary(transcription)
if "Subtitles" in selected_services:
results["subtitles"] = generate_subtitles(transcription)
if "Quiz" in selected_services:
results["quiz"] = generate_quiz(transcription)
return results