Sebbe33 commited on
Commit
835bf99
·
verified ·
1 Parent(s): 5b7402e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ import streamlit as st
5
+ from google import genai
6
+ from jinja2 import Template
7
+
8
+ st.title("Audio Transcription with Speaker Identification")
9
+ st.write("Upload an audio file to generate a transcript with speakers identified.")
10
+
11
+ # API Key Input
12
+ api_key_input = st.text_input(
13
+ "Gemini API Key",
14
+ type="password",
15
+ help="You can also set it via GEMINI_API_KEY environment variable."
16
+ )
17
+ api_key = api_key_input or os.getenv("GEMINI_API_KEY")
18
+
19
+ # Speakers Input
20
+ speakers_input = st.text_input(
21
+ "Known Speakers (comma-separated)",
22
+ help="List known speaker names. Leave empty if unknown."
23
+ )
24
+ speakers = [s.strip() for s in speakers_input.split(",")] if speakers_input else []
25
+
26
+ # File Upload
27
+ audio_file = st.file_uploader(
28
+ "Upload Audio File",
29
+ type=["mp3", "wav", "m4a", "ogg", "mp4"]
30
+ )
31
+
32
+ if st.button("Generate Transcript"):
33
+ if not api_key:
34
+ st.error("Please provide a Gemini API key.")
35
+ elif not audio_file:
36
+ st.error("Please upload an audio file.")
37
+ else:
38
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
39
+ tmp_file.write(audio_file.read())
40
+ tmp_file_path = tmp_file.name
41
+
42
+ try:
43
+ # Initialize GenAI client
44
+ client = genai.Client(api_key=api_key)
45
+
46
+ # Upload audio file
47
+ uploaded_file = client.files.upload(file=tmp_file_path)
48
+
49
+ # Create prompt template
50
+ prompt_template = Template("""Generate a transcript of the episode. Include timestamps and identify speakers.
51
+
52
+ Speakers are:
53
+ {% for speaker in speakers %}- {{ speaker }}{% if not loop.last %}\n{% endif %}{% endfor %}
54
+
55
+ eg:
56
+ [00:00] Brady: Hello there.
57
+ [00:02] Tim: Hi Brady.
58
+
59
+ It is important to include the correct speaker names. Use the names you identified earlier. If you really don't know the speaker's name, identify them with a letter of the alphabet, eg there may be an unknown speaker 'A' and another unknown speaker 'B'.
60
+
61
+ If there is music or a short jingle playing, signify like so:
62
+ [01:02] [MUSIC] or [01:02] [JINGLE]
63
+
64
+ If you can identify the name of the music or jingle playing then use that instead, eg:
65
+ [01:02] [Firework by Katy Perry] or [01:02] [The Sofa Shop jingle]
66
+
67
+ If there is some other sound playing try to identify the sound, eg:
68
+ [01:02] [Bell ringing]
69
+
70
+ Each individual caption should be quite short, a few short sentences at most.
71
+
72
+ Signify the end of the episode with [END].
73
+
74
+ Don't use any markdown formatting, like bolding or italics.
75
+
76
+ Only use characters from the English alphabet, unless you genuinely believe foreign characters are correct.
77
+
78
+ It is important that you use the correct words and spell everything correctly. Use the context of the podcast to help.
79
+ If the hosts discuss something like a movie, book or celebrity, make sure the movie, book, or celebrity name is spelled correctly.""")
80
+
81
+ prompt = prompt_template.render(speakers=speakers)
82
+
83
+ # Generate content
84
+ response = client.models.generate_content(
85
+ model="gemini-2.0-flash",
86
+ contents=[prompt, uploaded_file],
87
+ )
88
+
89
+ # Display results
90
+ st.subheader("Transcript")
91
+ st.code(response.text, language="text")
92
+
93
+ except Exception as e:
94
+ st.error(f"An error occurred: {str(e)}")
95
+ finally:
96
+ os.remove(tmp_file_path)