Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ import shutil
|
|
| 22 |
from PIL import Image
|
| 23 |
import google.generativeai as genai
|
| 24 |
from huggingface_hub import InferenceClient
|
| 25 |
-
|
| 26 |
|
| 27 |
class VideoClassifier:
|
| 28 |
global audio_time , setup_time , caption_time , classification_time
|
|
@@ -45,6 +45,7 @@ class VideoClassifier:
|
|
| 45 |
self.setup_gemini_model()
|
| 46 |
self.setup_paths()
|
| 47 |
self.hf_key = os.environ.get("HF_KEY", None)
|
|
|
|
| 48 |
# self.whisper_model = whisper.load_model("base")
|
| 49 |
|
| 50 |
def setup_paths(self):
|
|
@@ -164,6 +165,29 @@ class VideoClassifier:
|
|
| 164 |
task="transcribe"
|
| 165 |
result = pipe(audiotrack, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
|
| 166 |
return result["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
def generate_text(self, inputs, parameters=None):
|
| 169 |
if parameters is None:
|
|
@@ -178,7 +202,8 @@ class VideoClassifier:
|
|
| 178 |
|
| 179 |
def classify_video(self,video_input):
|
| 180 |
global classification_time , caption_time
|
| 181 |
-
transcript=self.audio_extraction_space(video_input)
|
|
|
|
| 182 |
start_time_caption = time.time()
|
| 183 |
video = cv2.VideoCapture(video_input)
|
| 184 |
length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
| 22 |
from PIL import Image
|
| 23 |
import google.generativeai as genai
|
| 24 |
from huggingface_hub import InferenceClient
|
| 25 |
+
from openai import OpenAI
|
| 26 |
|
| 27 |
class VideoClassifier:
|
| 28 |
global audio_time , setup_time , caption_time , classification_time
|
|
|
|
| 45 |
self.setup_gemini_model()
|
| 46 |
self.setup_paths()
|
| 47 |
self.hf_key = os.environ.get("HF_KEY", None)
|
| 48 |
+
self.client = OpenAI(api_key="sk-proj-KY1qI7zTpsUiJhMUHuNdT3BlbkFJLOjVnTUSpYJi87yUtSEI")
|
| 49 |
# self.whisper_model = whisper.load_model("base")
|
| 50 |
|
| 51 |
def setup_paths(self):
|
|
|
|
| 165 |
task="transcribe"
|
| 166 |
result = pipe(audiotrack, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
|
| 167 |
return result["text"]
|
| 168 |
+
|
| 169 |
+
def audio_extraction_chatgptapi(self,video_input):
|
| 170 |
+
"""For cpu inference , we use this function for faster api calling inference"""
|
| 171 |
+
global audio_time
|
| 172 |
+
start_time_audio = time.time()
|
| 173 |
+
print(f"Processing video: {video_input} with {self.no_of_frames} frames.")
|
| 174 |
+
mp4_file = video_input
|
| 175 |
+
video_name = mp4_file.split("/")[-1]
|
| 176 |
+
wav_file = "results/audiotrack.wav"
|
| 177 |
+
video_clip = VideoFileClip(mp4_file)
|
| 178 |
+
audioclip = video_clip.audio
|
| 179 |
+
wav_file = audioclip.write_audiofile(wav_file)
|
| 180 |
+
audioclip.close()
|
| 181 |
+
video_clip.close()
|
| 182 |
+
audiotrack = "results/audiotrack.wav"
|
| 183 |
+
# client = OpenAI(api_key="sk-proj-KY1qI7zTpsUiJhMUHuNdT3BlbkFJLOjVnTUSpYJi87yUtSEI")
|
| 184 |
+
# audiotrack= open("audiotrack.wav", "rb")
|
| 185 |
+
transcription = client.audio.transcriptions.create(
|
| 186 |
+
model="whisper-1",
|
| 187 |
+
file=audiotrack
|
| 188 |
+
)
|
| 189 |
+
print(transcription.text)
|
| 190 |
+
return transcription.text
|
| 191 |
|
| 192 |
def generate_text(self, inputs, parameters=None):
|
| 193 |
if parameters is None:
|
|
|
|
| 202 |
|
| 203 |
def classify_video(self,video_input):
|
| 204 |
global classification_time , caption_time
|
| 205 |
+
# transcript=self.audio_extraction_space(video_input)
|
| 206 |
+
transcript=self.audio_extraction_chatgptapi(video_input)
|
| 207 |
start_time_caption = time.time()
|
| 208 |
video = cv2.VideoCapture(video_input)
|
| 209 |
length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|