Update app.py
Browse files
app.py
CHANGED
|
@@ -15,11 +15,16 @@ import speech_recognition as sr
|
|
| 15 |
import json
|
| 16 |
from nltk.tokenize import sent_tokenize
|
| 17 |
import logging
|
|
|
|
| 18 |
|
| 19 |
# Configure logging
|
| 20 |
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def transcribe_video(video_path):
|
| 24 |
# Load the video file and extract audio
|
| 25 |
video = VideoFileClip(video_path)
|
|
@@ -35,7 +40,7 @@ def transcribe_video(video_path):
|
|
| 35 |
transcript = recognizer.recognize_google(audio_text)
|
| 36 |
|
| 37 |
# Split transcript into sentences
|
| 38 |
-
sentences =
|
| 39 |
|
| 40 |
# Create a list of timestamps for each sentence
|
| 41 |
timestamps = []
|
|
|
|
| 15 |
import json
|
| 16 |
from nltk.tokenize import sent_tokenize
|
| 17 |
import logging
|
| 18 |
+
from textblob import TextBlob
|
| 19 |
|
| 20 |
# Configure logging
|
| 21 |
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
+
def split_into_sentences(text):
|
| 25 |
+
blob = TextBlob(text)
|
| 26 |
+
return [str(sentence) for sentence in blob.sentences]
|
| 27 |
+
|
| 28 |
def transcribe_video(video_path):
|
| 29 |
# Load the video file and extract audio
|
| 30 |
video = VideoFileClip(video_path)
|
|
|
|
| 40 |
transcript = recognizer.recognize_google(audio_text)
|
| 41 |
|
| 42 |
# Split transcript into sentences
|
| 43 |
+
sentences = split_into_sentences(transcript)
|
| 44 |
|
| 45 |
# Create a list of timestamps for each sentence
|
| 46 |
timestamps = []
|