Raj Jayendrakumar Muchhala commited on
Commit
78ca458
Β·
1 Parent(s): debb8ec

support transcription

Browse files
Files changed (2) hide show
  1. app.py +81 -21
  2. requirements.txt +4 -1
app.py CHANGED
@@ -4,6 +4,9 @@ from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
4
  from prompts import SYSTEM_MESSAGE, USER_MESSAGE
5
  import json
6
  import os
 
 
 
7
 
8
  # Set Streamlit layout to wide mode
9
  st.set_page_config(layout="wide")
@@ -49,32 +52,89 @@ col_transcript, col_output = st.columns([1, 1])
49
 
50
  # Left Column: Transcript Input
51
  with col_transcript:
52
- st.subheader("πŸ“ Paste Your Transcript")
53
- transcript = st.text_area("Enter the transcript here:", height=400)
54
-
55
- # Add reference link below the transcript text box
56
- st.markdown("---")
57
- st.markdown(
58
- """
59
- <div style="font-size:18px; font-weight:bold; margin-top:10px;">
60
- Need a transcript? Use <a href="https://huggingface.co/spaces/openai/whisper" target="_blank" style="color:#007bff; text-decoration:none;">
61
- OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
62
- </div>
63
- """,
64
- unsafe_allow_html=True
65
- )
66
-
67
- st.markdown("---")
68
- st.subheader("πŸŽ₯ Video/Audio Upload & Playback")
69
-
70
- media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
71
- if media_file is not None:
72
- # Detect media type and play accordingly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  if media_file.type.startswith("video"):
74
  st.video(media_file)
75
  elif media_file.type.startswith("audio"):
76
  st.audio(media_file)
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # Right Column: Clip Plan Generation and Extraction
80
  with col_output:
 
4
  from prompts import SYSTEM_MESSAGE, USER_MESSAGE
5
  import json
6
  import os
7
+ import yt_dlp
8
+ import ffmpeg
9
+ from tempfile import NamedTemporaryFile
10
 
11
  # Set Streamlit layout to wide mode
12
  st.set_page_config(layout="wide")
 
52
 
53
  # Left Column: Transcript Input
54
  with col_transcript:
55
+ st.subheader("πŸ“ Enter Video Source")
56
+ youtube_url = st.text_input("Enter YouTube Video URL")
57
+ media_file = st.file_uploader("Or upload a video/audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
58
+ transcript = ""
59
+
60
+ def download_youtube_audio(url):
61
+ ydl_opts = {
62
+ "format": "bestaudio/best",
63
+ "extractaudio": True,
64
+ "audioformat": "mp3",
65
+ "outtmpl": "% (id)s.%(ext)s",
66
+ }
67
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
68
+ info = ydl.extract_info(url, download=True)
69
+ filename = ydl.prepare_filename(info).replace(".webm", ".mp3").replace(".m4a", ".mp3")
70
+ return compress_audio(filename)
71
+
72
+ def compress_audio(input_path, target_size_mb=25):
73
+ """Compress audio only if it exceeds the target size, adjusting bitrate dynamically."""
74
+ output_path = input_path.replace(".mp3", "_compressed.mp3")
75
+
76
+ # Check file size in MB
77
+ file_size_mb = os.path.getsize(input_path) / (1024 * 1024)
78
+
79
+ if file_size_mb <= target_size_mb:
80
+ return input_path # No need to compress if already under limit
81
+
82
+ # Estimate appropriate bitrate (targeting 90% of desired size)
83
+ target_bitrate_kbps = int((target_size_mb * 1024 * 1024 * 8) / (file_size_mb * 1.1)) # 10% buffer
84
+ target_bitrate_kbps = max(target_bitrate_kbps, 32) # Prevent extreme low-quality audio
85
+
86
+ ffmpeg.input(input_path).output(output_path, audio_bitrate=f"{target_bitrate_kbps}k").run(overwrite_output=True)
87
+ return output_path
88
+
89
+
90
+ def transcribe_audio(file_path):
91
+ whisper_client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://api.openai.com/v1")
92
+ transcription_args = {
93
+ "file": None,
94
+ "model": "whisper-1",
95
+ "response_format": "verbose_json",
96
+ "timestamp_granularities": ["word"],
97
+ "timeout": 360,
98
+ "prompt": "The audio may not contain speech, do not make up words."
99
+ }
100
+ with open(file_path, "rb") as audio_file:
101
+ transcription_args["file"] = audio_file
102
+ transcript_response = whisper_client.audio.transcriptions.create(**transcription_args)
103
+
104
+ transcript_words = transcript_response.words
105
+ transcript = " ".join([word['word'] for word in transcript_words])
106
+ return transcript
107
+
108
+
109
+ if youtube_url:
110
+ st.video(youtube_url)
111
+ elif media_file:
112
  if media_file.type.startswith("video"):
113
  st.video(media_file)
114
  elif media_file.type.startswith("audio"):
115
  st.audio(media_file)
116
 
117
+ if st.button("Transcribe Video"):
118
+ with st.spinner("Processing... This may take a few minutes."):
119
+ try:
120
+ if youtube_url:
121
+ audio_path = download_youtube_audio(youtube_url)
122
+ transcript = transcribe_audio(audio_path)
123
+ elif media_file:
124
+ with NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
125
+ temp_audio.write(media_file.read())
126
+ temp_audio.close()
127
+ audio_path = compress_audio(temp_audio.name)
128
+ transcript = transcribe_audio(audio_path)
129
+ else:
130
+ st.error("❌ Please provide a YouTube link or upload a file.")
131
+ except Exception as e:
132
+ st.error(f"Error: {str(e)}")
133
+
134
+ # Display the extracted transcript
135
+ st.subheader("πŸ“ Transcript")
136
+ transcript = st.text_area("Generated Transcript", transcript, height=300)
137
+
138
 
139
  # Right Column: Clip Plan Generation and Extraction
140
  with col_output:
requirements.txt CHANGED
@@ -1 +1,4 @@
1
- openai
 
 
 
 
1
+ openai
2
+ yt-dlp
3
+ pydub
4
+ ffmpeg-python