Mpavan45 commited on
Commit
6dc4cef
·
verified ·
1 Parent(s): 88fcf45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -8
app.py CHANGED
@@ -1,3 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import whisper
3
  import ffmpeg
@@ -8,10 +91,13 @@ from chromadb.utils import embedding_functions
8
  from chromadb import Client
9
  from chromadb.config import Settings
10
 
11
- def extract_audio(video_file):
12
- """Extracts audio using ffmpeg."""
13
  audio_path = "temp_audio.wav"
14
- ffmpeg.input(video_file).output(audio_path).run(overwrite_output=True)
 
 
 
15
  return audio_path
16
 
17
  def transcribe_audio(audio_path):
@@ -44,11 +130,11 @@ def search_subtitles(query, collection):
44
  return results['documents']
45
 
46
  def main():
47
- st.set_page_config(page_title="Video Subtitle Generator", layout="wide")
48
- st.title("🎥 Video Subtitle Generator")
49
 
50
  with st.sidebar:
51
- uploaded_file = st.file_uploader("Upload Video", type=["mp4", "mkv"])
52
  query = st.text_input("Search Subtitles")
53
  download_btn = st.button("Download Subtitles")
54
 
@@ -60,8 +146,12 @@ def main():
60
  subtitles = transcribe_audio(audio_path)
61
  st.success("Subtitles Generated!")
62
 
63
- # Display the video and subtitles
64
- st.video(uploaded_file)
 
 
 
 
65
  st.text_area("Generated Subtitles", subtitles, height=300)
66
 
67
  # Load and search embeddings
 
1
+ # import streamlit as st
2
+ # import whisper
3
+ # import ffmpeg
4
+ # import pandas as pd
5
+ # import pickle
6
+ # import os
7
+ # from chromadb.utils import embedding_functions
8
+ # from chromadb import Client
9
+ # from chromadb.config import Settings
10
+
11
+ # def extract_audio(video_file):
12
+ # """Extracts audio using ffmpeg."""
13
+ # audio_path = "temp_audio.wav"
14
+ # ffmpeg.input(video_file).output(audio_path).run(overwrite_output=True)
15
+ # return audio_path
16
+
17
+ # def transcribe_audio(audio_path):
18
+ # """Transcribes audio to text using Whisper."""
19
+ # model = whisper.load_model("base")
20
+ # result = model.transcribe(audio_path)
21
+ # return result['text']
22
+
23
+ # def load_embeddings():
24
+ # """Loads subtitle embeddings from pkl file."""
25
+ # with open('subtitle_embeddings.pkl', 'rb') as f:
26
+ # embeddings = pickle.load(f)
27
+ # return embeddings
28
+
29
+ # def save_to_chroma(embeddings):
30
+ # """Stores embeddings in Chroma DB."""
31
+ # client = Client(Settings())
32
+ # collection = client.create_collection(name="subtitles")
33
+ # for idx, row in embeddings.iterrows():
34
+ # collection.add(
35
+ # documents=[row['subtitle']],
36
+ # ids=[str(idx)],
37
+ # embeddings=[row['embedding']]
38
+ # )
39
+ # return collection
40
+
41
+ # def search_subtitles(query, collection):
42
+ # """Searches for subtitles in Chroma DB."""
43
+ # results = collection.query(query_texts=[query], n_results=5)
44
+ # return results['documents']
45
+
46
+ # def main():
47
+ # st.set_page_config(page_title="Video Subtitle Generator", layout="wide")
48
+ # st.title("🎥 Video Subtitle Generator")
49
+
50
+ # with st.sidebar:
51
+ # uploaded_file = st.file_uploader("Upload Video", type=["mp4", "mkv"])
52
+ # query = st.text_input("Search Subtitles")
53
+ # download_btn = st.button("Download Subtitles")
54
+
55
+ # if uploaded_file:
56
+ # with st.spinner("Extracting audio..."):
57
+ # audio_path = extract_audio(uploaded_file.name)
58
+
59
+ # with st.spinner("Generating subtitles..."):
60
+ # subtitles = transcribe_audio(audio_path)
61
+ # st.success("Subtitles Generated!")
62
+
63
+ # # Display the video and subtitles
64
+ # st.video(uploaded_file)
65
+ # st.text_area("Generated Subtitles", subtitles, height=300)
66
+
67
+ # # Load and search embeddings
68
+ # embeddings = load_embeddings()
69
+ # collection = save_to_chroma(embeddings)
70
+
71
+ # if query:
72
+ # results = search_subtitles(query, collection)
73
+ # st.write("### Matching Subtitles:")
74
+ # for sub in results:
75
+ # st.write(f"- {sub}")
76
+
77
+ # if download_btn:
78
+ # with open("generated_subtitles.srt", "w") as f:
79
+ # f.write(subtitles)
80
+ # st.download_button("Download SRT", "generated_subtitles.srt")
81
+
82
+ # if __name__ == '__main__':
83
+ # main()
84
  import streamlit as st
85
  import whisper
86
  import ffmpeg
 
91
  from chromadb import Client
92
  from chromadb.config import Settings
93
 
94
+ def extract_audio(file_path):
95
+ """Extracts audio from video or handles audio file directly."""
96
  audio_path = "temp_audio.wav"
97
+ if file_path.endswith(('.mp4', '.mkv')):
98
+ ffmpeg.input(file_path).output(audio_path).run(overwrite_output=True)
99
+ else:
100
+ audio_path = file_path
101
  return audio_path
102
 
103
  def transcribe_audio(audio_path):
 
130
  return results['documents']
131
 
132
  def main():
133
+ st.set_page_config(page_title="Video/Audio Subtitle Generator", layout="wide")
134
+ st.title("🎥🎵 Video/Audio Subtitle Generator")
135
 
136
  with st.sidebar:
137
+ uploaded_file = st.file_uploader("Upload Video/Audio", type=["mp4", "mkv", "mp3", "wav"])
138
  query = st.text_input("Search Subtitles")
139
  download_btn = st.button("Download Subtitles")
140
 
 
146
  subtitles = transcribe_audio(audio_path)
147
  st.success("Subtitles Generated!")
148
 
149
+ # Display the media and subtitles
150
+ if uploaded_file.name.endswith(('.mp4', '.mkv')):
151
+ st.video(uploaded_file)
152
+ else:
153
+ st.audio(uploaded_file)
154
+
155
  st.text_area("Generated Subtitles", subtitles, height=300)
156
 
157
  # Load and search embeddings