umaradnaan commited on
Commit
0f08dd3
Β·
verified Β·
1 Parent(s): 373b471

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -57
app.py CHANGED
@@ -3,114 +3,98 @@ import gradio as gr
3
  import google.generativeai as genai
4
  import speech_recognition as sr
5
  import tempfile
6
- import base64
7
- import time
8
 
9
  # -----------------------------
10
- # Gemini 1.5 Flash Setup
11
  # -----------------------------
12
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
13
  model = genai.GenerativeModel("gemini-1.5-flash")
14
 
15
  # -----------------------------
16
- # Voice to Text Setup
17
  # -----------------------------
18
  recognizer = sr.Recognizer()
19
 
20
  def voice_to_text(audio_bytes):
21
- """Convert uploaded mic audio to text."""
 
22
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
23
  tmp.write(audio_bytes)
24
- temp_path = tmp.name
25
-
26
- with sr.AudioFile(temp_path) as source:
27
  audio = recognizer.record(source)
28
  return recognizer.recognize_google(audio)
29
 
30
  # -----------------------------
31
- # Chat + Correction Loop
32
  # -----------------------------
33
- def generate_reply(user_input, history):
34
  if history is None:
35
  history = []
36
 
37
- # Auto-correction logic
38
  corrected = model.generate_content(
39
- f"Correct the user's sentence only if it's wrong. "
40
- f"If correct, return same sentence.\nSentence: {user_input}"
41
  ).text
42
 
43
- # If wrong β†’ ask again until correct
44
- if corrected.lower() != user_input.lower():
45
- return (
46
- history + [
47
- ("User", user_input),
48
- ("AI", f"❌ Incorrect. Try again.\nCorrect sentence: **{corrected}**")
49
- ],
50
- corrected,
51
- )
52
-
53
- # Generate normal chatbot response
54
  response = model.generate_content(
55
- f"You are a friendly AI assistant. Continue conversation naturally.\nUser: {corrected}"
56
  ).text
57
 
58
  history.append(("User", corrected))
59
  history.append(("AI", response))
60
-
61
- return history, response
62
 
63
  # -----------------------------
64
- # Gradio UI
65
  # -----------------------------
66
- css = """
67
- #chatbox {height: 520px !important;}
68
- """
69
-
70
- def stream_chat(message, history):
71
- """Stream response in real-time"""
72
- history = history or []
73
 
74
  reply = model.generate_content(
75
- message, stream=True
76
  )
77
 
78
- output = ""
79
  for chunk in reply:
80
  if chunk.text:
81
- output += chunk.text
82
- yield output
83
 
84
  # -----------------------------
85
- # UI Layout (Layout 1)
86
  # -----------------------------
87
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
88
 
89
- gr.HTML("<h2 style='text-align:center;'>🎀 AI Voice Chatbot (Gemini 1.5 Flash)</h2>")
 
90
 
91
- chatbot = gr.Chatbot(
92
- avatar_images=("assets/avatars/ai_avatar.png", "assets/avatars/user_avatar.png"),
93
- elem_id="chatbox",
94
- height=530
95
- )
96
 
97
  with gr.Row():
98
- msg = gr.Textbox(placeholder="Type or speak your message...")
99
- mic = gr.Audio(source="microphone", type="bytes", label="🎀 Speak your topic")
100
- send_btn = gr.Button("Send")
101
 
102
- # Voice input handler
103
  def handle_voice(audio, history):
104
- if audio is None:
105
- return history
106
  text = voice_to_text(audio)
 
 
107
  history.append(("User", text))
108
  return history, text
109
 
110
- mic.submit(handle_voice, [mic, chatbot], [chatbot, msg])
111
 
112
- # Text conversation submit
113
- send_btn.click(stream_chat, [msg, chatbot], chatbot)
114
- msg.submit(stream_chat, [msg, chatbot], chatbot)
115
 
116
  app.launch()
 
3
  import google.generativeai as genai
4
  import speech_recognition as sr
5
  import tempfile
 
 
6
 
7
  # -----------------------------
8
+ # Gemini Setup
9
  # -----------------------------
10
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
11
  model = genai.GenerativeModel("gemini-1.5-flash")
12
 
13
  # -----------------------------
14
+ # Voice to Text
15
  # -----------------------------
16
  recognizer = sr.Recognizer()
17
 
18
  def voice_to_text(audio_bytes):
19
+ if audio_bytes is None:
20
+ return ""
21
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
22
  tmp.write(audio_bytes)
23
+ path = tmp.name
24
+ with sr.AudioFile(path) as source:
 
25
  audio = recognizer.record(source)
26
  return recognizer.recognize_google(audio)
27
 
28
  # -----------------------------
29
+ # AI Logic
30
  # -----------------------------
31
+ def generate_reply(message, history):
32
  if history is None:
33
  history = []
34
 
35
+ # Sentence correction
36
  corrected = model.generate_content(
37
+ f"Fix the user's sentence ONLY if it is grammatically wrong.\nUser: {message}"
 
38
  ).text
39
 
40
+ if corrected.lower() != message.lower():
41
+ history.append(("User", message))
42
+ history.append(("AI", f"❌ Incorrect. Repeat correctly:\n➑️ {corrected}"))
43
+ return history
44
+
45
+ # Natural conversation
 
 
 
 
 
46
  response = model.generate_content(
47
+ f"Continue conversation naturally.\nUser: {corrected}"
48
  ).text
49
 
50
  history.append(("User", corrected))
51
  history.append(("AI", response))
52
+ return history
 
53
 
54
  # -----------------------------
55
+ # Gradio Streaming
56
  # -----------------------------
57
+ def stream_reply(message, history):
58
+ if history is None:
59
+ history = []
 
 
 
 
60
 
61
  reply = model.generate_content(
62
+ f"User: {message}", stream=True
63
  )
64
 
65
+ full = ""
66
  for chunk in reply:
67
  if chunk.text:
68
+ full += chunk.text
69
+ yield full
70
 
71
  # -----------------------------
72
+ # UI
73
  # -----------------------------
74
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
75
 
76
+ gr.Markdown("### 🎀 AI Voice Conversation Bot (Gemini 1.5 Flash)")
77
+ gr.Markdown("Speak a topic β†’ AI starts β†’ If you pronounce wrong β†’ AI corrects you until perfect.")
78
 
79
+ chatbot = gr.Chatbot(height=500, show_label=False)
 
 
 
 
80
 
81
  with gr.Row():
82
+ txt = gr.Textbox(placeholder="Type or speak...", scale=3)
83
+ mic = gr.Audio(type="bytes", label="🎀 Speak", scale=2)
84
+ send = gr.Button("Send", scale=1)
85
 
86
+ # Voice handler
87
  def handle_voice(audio, history):
 
 
88
  text = voice_to_text(audio)
89
+ if not text:
90
+ return history, ""
91
  history.append(("User", text))
92
  return history, text
93
 
94
+ mic.change(handle_voice, [mic, chatbot], [chatbot, txt])
95
 
96
+ # Text submit
97
+ send.click(generate_reply, [txt, chatbot], chatbot)
98
+ txt.submit(generate_reply, [txt, chatbot], chatbot)
99
 
100
  app.launch()