umaradnaan commited on
Commit
af7fc5f
Β·
verified Β·
1 Parent(s): 0f08dd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -60
app.py CHANGED
@@ -3,98 +3,148 @@ import gradio as gr
3
  import google.generativeai as genai
4
  import speech_recognition as sr
5
  import tempfile
 
6
 
7
  # -----------------------------
8
- # Gemini Setup
9
  # -----------------------------
10
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 
 
 
 
11
  model = genai.GenerativeModel("gemini-1.5-flash")
12
 
13
  # -----------------------------
14
- # Voice to Text
15
  # -----------------------------
16
  recognizer = sr.Recognizer()
17
 
18
- def voice_to_text(audio_bytes):
19
- if audio_bytes is None:
 
 
 
 
 
 
 
 
 
 
20
  return ""
21
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
22
- tmp.write(audio_bytes)
23
- path = tmp.name
24
- with sr.AudioFile(path) as source:
25
- audio = recognizer.record(source)
26
- return recognizer.recognize_google(audio)
27
 
28
  # -----------------------------
29
- # AI Logic
30
  # -----------------------------
31
  def generate_reply(message, history):
32
- if history is None:
33
- history = []
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Sentence correction
36
- corrected = model.generate_content(
37
- f"Fix the user's sentence ONLY if it is grammatically wrong.\nUser: {message}"
38
- ).text
 
 
 
 
 
 
 
 
 
39
 
40
- if corrected.lower() != message.lower():
41
- history.append(("User", message))
42
- history.append(("AI", f"❌ Incorrect. Repeat correctly:\n➑️ {corrected}"))
 
 
43
  return history
44
 
45
- # Natural conversation
46
- response = model.generate_content(
47
- f"Continue conversation naturally.\nUser: {corrected}"
48
- ).text
 
 
 
 
 
 
 
 
49
 
50
  history.append(("User", corrected))
51
  history.append(("AI", response))
52
  return history
53
 
54
  # -----------------------------
55
- # Gradio Streaming
56
  # -----------------------------
57
- def stream_reply(message, history):
58
- if history is None:
59
- history = []
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- reply = model.generate_content(
62
- f"User: {message}", stream=True
63
- )
64
-
65
- full = ""
66
- for chunk in reply:
67
- if chunk.text:
68
- full += chunk.text
69
- yield full
70
 
71
  # -----------------------------
72
- # UI
73
  # -----------------------------
74
- with gr.Blocks(theme=gr.themes.Soft()) as app:
 
 
 
 
75
 
76
- gr.Markdown("### 🎀 AI Voice Conversation Bot (Gemini 1.5 Flash)")
77
- gr.Markdown("Speak a topic β†’ AI starts β†’ If you pronounce wrong β†’ AI corrects you until perfect.")
78
 
79
- chatbot = gr.Chatbot(height=500, show_label=False)
80
 
81
  with gr.Row():
82
- txt = gr.Textbox(placeholder="Type or speak...", scale=3)
83
- mic = gr.Audio(type="bytes", label="🎀 Speak", scale=2)
84
- send = gr.Button("Send", scale=1)
85
-
86
- # Voice handler
87
- def handle_voice(audio, history):
88
- text = voice_to_text(audio)
89
- if not text:
90
- return history, ""
91
- history.append(("User", text))
92
- return history, text
93
-
94
- mic.change(handle_voice, [mic, chatbot], [chatbot, txt])
95
-
96
- # Text submit
97
- send.click(generate_reply, [txt, chatbot], chatbot)
98
- txt.submit(generate_reply, [txt, chatbot], chatbot)
99
 
100
  app.launch()
 
3
  import google.generativeai as genai
4
  import speech_recognition as sr
5
  import tempfile
6
+ import time
7
 
8
  # -----------------------------
9
+ # CONFIG
10
  # -----------------------------
11
+ # Make sure you set GEMINI_API_KEY in your Space secrets (Settings β†’ Variables)
12
+ API_KEY = os.getenv("GEMINI_API_KEY")
13
+ if not API_KEY:
14
+ raise RuntimeError("Please set the GEMINI_API_KEY environment variable in Space settings.")
15
+ genai.configure(api_key=API_KEY)
16
  model = genai.GenerativeModel("gemini-1.5-flash")
17
 
18
  # -----------------------------
19
+ # Speech recognition helper
20
  # -----------------------------
21
  recognizer = sr.Recognizer()
22
 
23
+ def audiofile_to_text(audio_filepath):
24
+ """Transcribe an audio file (wav/m4a/etc.) to text using SpeechRecognition."""
25
+ if not audio_filepath:
26
+ return ""
27
+ try:
28
+ with sr.AudioFile(audio_filepath) as source:
29
+ audio = recognizer.record(source)
30
+ text = recognizer.recognize_google(audio)
31
+ return text
32
+ except Exception as e:
33
+ # return empty string on failure and log to console
34
+ print("Transcription error:", e)
35
  return ""
 
 
 
 
 
 
36
 
37
  # -----------------------------
38
+ # AI logic: correction + conversation
39
  # -----------------------------
40
  def generate_reply(message, history):
41
+ """
42
+ Main conversation step:
43
+ - If user's sentence is incorrect, generator will return a corrected sentence.
44
+ - If corrected != user sentence -> prompt user to repeat corrected sentence.
45
+ - Otherwise continue the conversation normally.
46
+ """
47
+ history = history or []
48
+
49
+ # sanitize
50
+ user_msg = (message or "").strip()
51
+ if user_msg == "":
52
+ history.append(("AI", "❌ I didn't receive a message. Please type or speak something."))
53
+ return history
54
 
55
+ # Ask Gemini to correct the user's sentence (if wrong)
56
+ try:
57
+ correction_prompt = (
58
+ "You are a pronunciation and grammar tutor. "
59
+ "Given the user's sentence, if it contains grammatical or word errors, "
60
+ "output the corrected sentence only. If it's already correct, output the same sentence.\n\n"
61
+ f"User sentence: \"{user_msg}\""
62
+ )
63
+ corrected = model.generate_content(correction_prompt).text.strip()
64
+ except Exception as e:
65
+ print("Error calling Gemini (correction):", e)
66
+ history.append(("AI", "⚠️ Error contacting language model (correction). Try again later."))
67
+ return history
68
 
69
+ # If model gives a different sentence -> ask user to repeat
70
+ if corrected.lower() != user_msg.lower():
71
+ history.append(("User", user_msg))
72
+ history.append(("AI", f"❌ Incorrect. Please repeat this sentence exactly:\n➑ {corrected}"))
73
+ # keep expected sentence implicit β€” the next user reply should match corrected
74
  return history
75
 
76
+ # Otherwise, continue the conversation
77
+ try:
78
+ convo_prompt = (
79
+ "You are a friendly conversational tutor. Continue the conversation naturally, "
80
+ "ask a short follow-up question or make a short comment relevant to the user's message.\n\n"
81
+ f"User: \"{corrected}\""
82
+ )
83
+ response = model.generate_content(convo_prompt).text.strip()
84
+ except Exception as e:
85
+ print("Error calling Gemini (conversation):", e)
86
+ history.append(("AI", "⚠️ Error contacting language model (conversation). Try again later."))
87
+ return history
88
 
89
  history.append(("User", corrected))
90
  history.append(("AI", response))
91
  return history
92
 
93
  # -----------------------------
94
+ # Transcribe audio button handler
95
  # -----------------------------
96
+ def transcribe_and_show(audio_filepath, history):
97
+ """
98
+ Use this when user records audio and clicks "Transcribe".
99
+ This function:
100
+ - transcribes audio file to text
101
+ - appends a chat entry showing the transcribed topic/text
102
+ - returns updated chat history and the transcribed text for the textbox
103
+ """
104
+ history = history or []
105
+ text = audiofile_to_text(audio_filepath)
106
+ if text == "":
107
+ history.append(("AI", "❌ Could not transcribe audio. Please try again or use a clearer recording."))
108
+ return history, ""
109
+ # show transcribed text in chat and return it to the text box for editing/sending
110
+ history.append(("User (transcribed)", text))
111
+ return history, text
112
 
113
+ # -----------------------------
114
+ # Reset conversation
115
+ # -----------------------------
116
+ def reset_chat():
117
+ return []
 
 
 
 
118
 
119
  # -----------------------------
120
+ # Gradio UI
121
  # -----------------------------
122
+ title_md = """
123
+ # 🎀 AI Voice Conversation Tutor
124
+ Speak a topic (or type it). The AI will start the conversation.
125
+ If you speak/sentence incorrectly, the AI will show the corrected sentence and ask you to repeat it until correct.
126
+ """
127
 
128
+ with gr.Blocks() as app:
129
+ gr.Markdown(title_md)
130
 
131
+ chatbot = gr.Chatbot(value=[], label="Conversation", elem_id="chatbox")
132
 
133
  with gr.Row():
134
+ with gr.Column(scale=3):
135
+ txt = gr.Textbox(placeholder="Type your message or use the microphone and Transcribe...", label="Message")
136
+ send = gr.Button("Send")
137
+ reset = gr.Button("Reset Conversation")
138
+ with gr.Column(scale=2):
139
+ audio = gr.Audio(source="upload", type="filepath", label="Record or upload audio (wav/m4a/mp3)")
140
+ transcribe = gr.Button("Transcribe Audio")
141
+
142
+ # Hook up events
143
+ transcribe.click(transcribe_and_show, inputs=[audio, chatbot], outputs=[chatbot, txt])
144
+ send.click(generate_reply, inputs=[txt, chatbot], outputs=chatbot)
145
+ txt.submit(generate_reply, inputs=[txt, chatbot], outputs=chatbot)
146
+ reset.click(lambda: [], outputs=chatbot)
147
+
148
+ gr.Markdown("**How to use:**\n\n1. Speak a topic using the audio control and click **Transcribe Audio** (or type the topic in the box). \n2. The AI will start the conversation. \n3. If you pronounce incorrectly, AI will show the corrected sentence β€” repeat it (record & transcribe or type) until correct. \n")
 
 
149
 
150
  app.launch()