Spaces:

Sebbe33
/

Transcription_gemini

Running

App Files Files Community

Sebbe33 commited on Feb 22, 2025

Commit

835bf99

verified ·

1 Parent(s): 5b7402e

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+import tempfile
+import streamlit as st
+from google import genai
+from jinja2 import Template
+st.title("Audio Transcription with Speaker Identification")
+st.write("Upload an audio file to generate a transcript with speakers identified.")
+# API Key Input
+api_key_input = st.text_input(
+    "Gemini API Key",
+    type="password",
+    help="You can also set it via GEMINI_API_KEY environment variable."
+)
+api_key = api_key_input or os.getenv("GEMINI_API_KEY")
+# Speakers Input
+speakers_input = st.text_input(
+    "Known Speakers (comma-separated)",
+    help="List known speaker names. Leave empty if unknown."
+)
+speakers = [s.strip() for s in speakers_input.split(",")] if speakers_input else []
+# File Upload
+audio_file = st.file_uploader(
+    "Upload Audio File",
+    type=["mp3", "wav", "m4a", "ogg", "mp4"]
+)
+if st.button("Generate Transcript"):
+    if not api_key:
+        st.error("Please provide a Gemini API key.")
+    elif not audio_file:
+        st.error("Please upload an audio file.")
+    else:
+        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+            tmp_file.write(audio_file.read())
+            tmp_file_path = tmp_file.name
+        try:
+            # Initialize GenAI client
+            client = genai.Client(api_key=api_key)
+            # Upload audio file
+            uploaded_file = client.files.upload(file=tmp_file_path)
+            # Create prompt template
+            prompt_template = Template("""Generate a transcript of the episode. Include timestamps and identify speakers.
+Speakers are:
+{% for speaker in speakers %}- {{ speaker }}{% if not loop.last %}\n{% endif %}{% endfor %}
+eg:
+[00:00] Brady: Hello there.
+[00:02] Tim: Hi Brady.
+It is important to include the correct speaker names. Use the names you identified earlier. If you really don't know the speaker's name, identify them with a letter of the alphabet, eg there may be an unknown speaker 'A' and another unknown speaker 'B'.
+If there is music or a short jingle playing, signify like so:
+[01:02] [MUSIC] or [01:02] [JINGLE]
+If you can identify the name of the music or jingle playing then use that instead, eg:
+[01:02] [Firework by Katy Perry] or [01:02] [The Sofa Shop jingle]
+If there is some other sound playing try to identify the sound, eg:
+[01:02] [Bell ringing]
+Each individual caption should be quite short, a few short sentences at most.
+Signify the end of the episode with [END].
+Don't use any markdown formatting, like bolding or italics.
+Only use characters from the English alphabet, unless you genuinely believe foreign characters are correct.
+It is important that you use the correct words and spell everything correctly. Use the context of the podcast to help.
+If the hosts discuss something like a movie, book or celebrity, make sure the movie, book, or celebrity name is spelled correctly.""")
+            prompt = prompt_template.render(speakers=speakers)
+            # Generate content
+            response = client.models.generate_content(
+                model="gemini-2.0-flash",
+                contents=[prompt, uploaded_file],
+            )
+            # Display results
+            st.subheader("Transcript")
+            st.code(response.text, language="text")
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+        finally:
+            os.remove(tmp_file_path)