Mpavan45 commited on
Commit
1c604f5
·
verified ·
1 Parent(s): 9fe52a7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.title("🎥 Video Subtitle Generator with Chroma DB and Cosine Similarity")
3
+
4
+ # Upload video
5
+ uploaded_file = st.file_uploader("Upload a video", type=["mp4", "avi", "mov", "mkv"])
6
+
7
+ if uploaded_file:
8
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
9
+ temp_video.write(uploaded_file.getbuffer())
10
+ video_path = temp_video.name
11
+
12
+ audio_path = "temp_audio.wav"
13
+
14
+ # Extract audio
15
+ st.info("Extracting audio...")
16
+ extract_audio(video_path, audio_path)
17
+
18
+ # Transcribe audio
19
+ st.info("Transcribing audio...")
20
+ transcribed_text = transcribe_audio(audio_path)
21
+ st.text_area("Transcribed Text", transcribed_text, height=150)
22
+
23
+ # Initialize Chroma DB client
24
+ chroma_client = chromadb.Client()
25
+
26
+ # Load subtitle database into Chroma DB
27
+ subtitle_db_path = "database.csv"
28
+ collection, df = load_subtitle_db_chroma(subtitle_db_path, chroma_client)
29
+
30
+ # Find matching subtitles with Chroma DB
31
+ st.info("Finding matching subtitles...")
32
+ matching_subtitles, subtitle_embeddings = find_chroma_subtitles(transcribed_text, collection)
33
+
34
+ # Generate query embedding for cosine similarity
35
+ query_embedding = generate_embedding(transcribed_text)
36
+
37
+ # Compute cosine similarity
38
+ cosine_similarities = compute_cosine_similarity(query_embedding, subtitle_embeddings)
39
+
40
+ # Sort by cosine similarity
41
+ for i, sub in enumerate(matching_subtitles):
42
+ sub['cosine_similarity'] = cosine_similarities[i]
43
+
44
+ # Sort by similarity score
45
+ matching_subtitles = sorted(matching_subtitles, key=lambda x: x['cosine_similarity'], reverse=True)
46
+
47
+ # Display video
48
+ st.video(video_path)
49
+
50
+ # Display matching subtitles with similarity scores
51
+ st.subheader("📜 Matching Subtitles (Chroma DB + Cosine Similarity)")
52
+ for sub in matching_subtitles:
53
+ st.write(f"**Subtitle:** {sub['text']}")
54
+ st.write(f"**Cosine Similarity:** {sub['cosine_similarity']:.4f}")
55
+ st.write("---")
56
+
57
+ # Cleanup
58
+ os.remove(video_path)
59
+ os.remove(audio_path)