Mpavan45 commited on
Commit
c777fe0
·
verified ·
1 Parent(s): 25169b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -140
app.py CHANGED
@@ -1,143 +1,3 @@
1
- # import streamlit as st
2
- # import whisper
3
- # import ffmpeg
4
- # import pandas as pd
5
- # import pickle
6
- # import os
7
- # import numpy as np
8
- # from sentence_transformers import SentenceTransformer
9
- # from chromadb import Client
10
- # from chromadb.config import Settings
11
-
12
- # embed_model = SentenceTransformer('all-MiniLM-L6-v2')
13
-
14
- # def extract_audio(uploaded_file):
15
- # audio_path = "temp_audio.wav"
16
- # temp_file = f"temp_{uploaded_file.name}"
17
- # with open(temp_file, "wb") as f:
18
- # f.write(uploaded_file.getvalue())
19
-
20
- # try:
21
- # if uploaded_file.name.endswith(('.mp4', '.mkv')):
22
- # ffmpeg.input(temp_file).output(audio_path).run(overwrite_output=True)
23
- # else:
24
- # audio_path = temp_file
25
- # return audio_path, temp_file
26
- # except Exception as e:
27
- # st.error(f"Error extracting audio: {str(e)}")
28
- # return None, None
29
-
30
- # def transcribe_audio(audio_path):
31
- # try:
32
- # model = whisper.load_model("base")
33
- # result = model.transcribe(audio_path)
34
-
35
- # subtitles = []
36
- # for i, segment in enumerate(result['segments']):
37
- # start_time = format_timestamp(segment['start'])
38
- # end_time = format_timestamp(segment['end'])
39
- # text = segment['text']
40
- # subtitles.append(f"{i + 1}\n{start_time} --> {end_time}\n{text}\n")
41
-
42
- # return subtitles
43
- # except Exception as e:
44
- # st.error(f"Error during transcription: {str(e)}")
45
- # return []
46
-
47
- # def format_timestamp(seconds):
48
- # hours = int(seconds // 3600)
49
- # minutes = int((seconds % 3600) // 60)
50
- # secs = int(seconds % 60)
51
- # millis = int((seconds % 1) * 1000)
52
- # return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"
53
-
54
- # def embed_subtitles(subtitles):
55
- # raw_texts = [line.split('\n')[2] for line in subtitles if line.strip()]
56
- # embeddings = embed_model.encode(raw_texts)
57
-
58
- # df = pd.DataFrame({
59
- # 'subtitle': raw_texts,
60
- # 'embedding': list(embeddings)
61
- # })
62
-
63
- # with open('subtitle_embeddings.pkl', 'wb') as f:
64
- # pickle.dump(df, f)
65
-
66
- # return df
67
-
68
- # def save_to_chroma(embeddings):
69
- # client = Client(Settings())
70
- # collection = client.create_collection(name="subtitles")
71
-
72
- # for idx, row in embeddings.iterrows():
73
- # collection.add(
74
- # documents=[row['subtitle']],
75
- # ids=[str(idx)],
76
- # embeddings=[row['embedding']]
77
- # )
78
- # return collection
79
-
80
- # def search_subtitles(query, collection):
81
- # try:
82
- # results = collection.query(query_texts=[query], n_results=5)
83
- # return results['documents']
84
- # except Exception as e:
85
- # st.error(f"Error searching subtitles: {str(e)}")
86
- # return []
87
-
88
- # def main():
89
- # st.set_page_config(page_title="Video/Audio Subtitle Generator", layout="wide")
90
- # st.title("🎥🎵 Video/Audio Subtitle Generator")
91
-
92
- # with st.sidebar:
93
- # uploaded_file = st.file_uploader("Upload Video/Audio", type=["mp4", "mkv", "mp3", "wav"])
94
- # query = st.text_input("Search Subtitles")
95
- # download_btn = st.button("Download Subtitles")
96
-
97
- # if uploaded_file:
98
- # with st.spinner("Extracting audio..."):
99
- # audio_path, temp_file = extract_audio(uploaded_file)
100
-
101
- # if audio_path:
102
- # with st.spinner("Generating subtitles..."):
103
- # subtitles = transcribe_audio(audio_path)
104
- # st.success("Subtitles Generated!")
105
-
106
- # if uploaded_file.name.endswith(('.mp4', '.mkv')):
107
- # st.video(uploaded_file)
108
- # else:
109
- # st.audio(uploaded_file)
110
-
111
- # st.write("### Generated Subtitles:")
112
- # for sub in subtitles:
113
- # st.text(sub)
114
-
115
- # with st.spinner("Embedding and storing subtitles..."):
116
- # embeddings = embed_subtitles(subtitles)
117
-
118
- # if embeddings.empty:
119
- # st.warning("No subtitles generated.")
120
- # else:
121
- # collection = save_to_chroma(embeddings)
122
-
123
- # if query:
124
- # results = search_subtitles(query, collection)
125
- # st.write("### Matching Subtitles:")
126
- # if results:
127
- # for idx, sub in enumerate(results, start=1):
128
- # st.write(f"{idx}. {sub}")
129
- # else:
130
- # st.warning("No matching subtitles found.")
131
-
132
- # if download_btn:
133
- # with open("generated_subtitles.srt", "w") as f:
134
- # f.writelines(subtitles)
135
-
136
- # with open("generated_subtitles.srt", "rb") as f:
137
- # st.download_button("Download SRT", f, file_name="generated_subtitles.srt", mime="text/plain")
138
-
139
- # if __name__ == '__main__':
140
- # main()
141
  import streamlit as st
142
  import whisper
143
  import ffmpeg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import whisper
3
  import ffmpeg