Spaces:

Curify
/

Studio_V0

Sleeping

qqwjq1981 commited on Jan 16, 2025

Commit

d5c151e

verified ·

1 Parent(s): 0a3a2c9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,11 +15,16 @@ import speech_recognition as sr
 import json
 from nltk.tokenize import sent_tokenize
 import logging
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 def transcribe_video(video_path):
     # Load the video file and extract audio
     video = VideoFileClip(video_path)
@@ -35,7 +40,7 @@ def transcribe_video(video_path):
         transcript = recognizer.recognize_google(audio_text)
     # Split transcript into sentences
-    sentences = sent_tokenize(transcript)
     # Create a list of timestamps for each sentence
     timestamps = []

 import json
 from nltk.tokenize import sent_tokenize
 import logging
+from textblob import TextBlob
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
+def split_into_sentences(text):
+    blob = TextBlob(text)
+    return [str(sentence) for sentence in blob.sentences]
 def transcribe_video(video_path):
     # Load the video file and extract audio
     video = VideoFileClip(video_path)
         transcript = recognizer.recognize_google(audio_text)
     # Split transcript into sentences
+    sentences = split_into_sentences(transcript)
     # Create a list of timestamps for each sentence
     timestamps = []