Spaces:

awacke1
/

GPT-4o-omni-text-audio-image-video

Running

App Files Files Community

awacke1 commited on Oct 6, 2024

Commit

1551d11

verified ·

1 Parent(s): 310f839

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -41

app.py CHANGED Viewed

@@ -313,6 +313,55 @@ def extract_title(text):
         title = re.sub(r'[^a-zA-Z0-9_\-]', ' ', text[-200:])
     return title[-200:]
 #@st.cache_resource
 def process_image(image_input, user_prompt):
     SaveNewFile=True
@@ -392,47 +441,6 @@ def create_audio_file(filename, audio_data, should_save):
     else:
         st.warning("Audio file not saved.")
-def process_audio(audio_input, text_input=''):
-    if audio_input:
-        transcription = client.audio.transcriptions.create(
-            model="whisper-1",
-            file=audio_input,
-        )
-        st.session_state.messages.append({"role": "user", "content": transcription.text})
-        with st.chat_message("assistant"):
-            st.markdown(transcription.text)
-            SpeechSynthesis(transcription.text)
-            filename = generate_filename(transcription.text, "wav")
-            create_audio_file(filename, audio_input, should_save)
-        #SpeechSynthesis(transcription.text)
-        filename = generate_filename(transcription.text, "md")
-        create_file(filename, transcription.text, transcription.text, should_save)
-        #st.markdown(response.choices[0].message.content)
-def process_audio_for_video(video_input):
-    if video_input:
-        try:
-            transcription = client.audio.transcriptions.create(
-                model="whisper-1",
-                file=video_input,
-            )
-            response = client.chat.completions.create(
-                model=MODEL,
-                messages=[
-                {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
-                {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}],}
-                ],
-                temperature=0,
-            )
-            st.markdown(response.choices[0].message.content)
-            return response.choices[0].message.content
-        except:
-            st.write('No transcript')
 def save_video(video_file):
     # Save the uploaded video file
     with open(video_file.name, "wb") as f:

         title = re.sub(r'[^a-zA-Z0-9_\-]', ' ', text[-200:])
     return title[-200:]
+def process_audio(audio_input, text_input=''):
+    if audio_input:
+        # Check type - if it is a file we need bytes
+        if isinstance(audio_input, str):
+            with open(audio_input, "rb") as file:
+                audio_input = file.read()
+                SaveNewFile=False # file is there and this is just prompt inference
+                st.write(audio_input)
+        transcription = client.audio.transcriptions.create(
+            model="whisper-1",
+            file=audio_input,
+        )
+        st.session_state.messages.append({"role": "user", "content": transcription.text})
+        with st.chat_message("assistant"):
+            st.markdown(transcription.text)
+            SpeechSynthesis(transcription.text)
+            filename = generate_filename(transcription.text, "wav")
+            create_audio_file(filename, audio_input, should_save)
+        #SpeechSynthesis(transcription.text)
+        filename = generate_filename(transcription.text, "md")
+        create_file(filename, transcription.text, transcription.text, should_save)
+        #st.markdown(response.choices[0].message.content)
+def process_audio_for_video(video_input):
+    if video_input:
+        try:
+            transcription = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=video_input,
+            )
+            response = client.chat.completions.create(
+                model=MODEL,
+                messages=[
+                {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
+                {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}],}
+                ],
+                temperature=0,
+            )
+            st.markdown(response.choices[0].message.content)
+            return response.choices[0].message.content
+        except:
+            st.write('No transcript')
 #@st.cache_resource
 def process_image(image_input, user_prompt):
     SaveNewFile=True
     else:
         st.warning("Audio file not saved.")
 def save_video(video_file):
     # Save the uploaded video file
     with open(video_file.name, "wb") as f: