SmartScribe / app.py
thomasanto7001's picture
Update app.py
a4e49da verified
raw
history blame
3.51 kB
import whisper
from moviepy.video.io.VideoFileClip import VideoFileClip
from transformers import pipeline
import nltk
import os
import re
import random
import subprocess
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
def download_youtube_video(youtube_url, filename="youtube_video.mp4"):
print(f"⬇️ Downloading YouTube video via yt-dlp: {youtube_url}")
command = ["yt-dlp", "-f", "best[ext=mp4]+bestaudio/best", "-o", filename, youtube_url]
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
raise Exception("YouTube download failed: " + result.stderr)
return filename
def extract_audio(video_path):
clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
return audio_path
def transcribe_audio(audio_path):
model = whisper.load_model("base")
result = model.transcribe(audio_path)
return result["text"]
def generate_summary(text, max_len=130):
summarizer = pipeline("summarization")
sentences = sent_tokenize(text)
chunks = [' '.join(sentences[i:i+10]) for i in range(0, len(sentences), 10)]
summary = ""
for chunk in chunks:
summary += summarizer(chunk, max_length=max_len, min_length=30, do_sample=False)[0]["summary_text"] + " "
return summary.strip()
def generate_subtitles(text):
sentences = sent_tokenize(text)
subtitles = []
for i, sentence in enumerate(sentences):
start_time = i * 5
end_time = start_time + 5
subtitles.append(f"{i+1}\n00:00:{start_time:02},000 --> 00:00:{end_time:02},000\n{sentence}\n")
return "\n".join(subtitles)
def generate_quiz(text, num_questions=5):
sentences = sent_tokenize(text)
tfidf = TfidfVectorizer(stop_words='english')
X = tfidf.fit_transform(sentences)
quiz = []
used = set()
for _ in range(num_questions):
i = random.choice([x for x in range(len(sentences)) if x not in used])
used.add(i)
question = sentences[i]
options = [question]
while len(options) < 4:
j = random.randint(0, len(sentences) - 1)
if j != i and sentences[j] not in options:
options.append(sentences[j])
random.shuffle(options)
quiz.append({
"question": question,
"options": options,
"answer": question
})
return "\n\n".join(
[f"Q{i+1}: {q['question']}\nOptions:\n" + "\n".join([f"{chr(65+j)}. {opt}" for j, opt in enumerate(q['options'])]) for i, q in enumerate(quiz)]
)
def process_video(video_path, selected_services):
results = {}
print("🔧 Extracting audio...")
audio_path = extract_audio(video_path)
transcription = transcribe_audio(audio_path) if "Transcription" in selected_services else None
if transcription:
results["transcription"] = transcription
if "Summary" in selected_services:
results["summary"] = generate_summary(transcription)
if "Subtitles" in selected_services:
results["subtitles"] = generate_subtitles(transcription)
if "Quiz" in selected_services:
results["quiz"] = generate_quiz(transcription)
return results