1MR commited on
Commit
f3fea9b
·
verified ·
1 Parent(s): 85a4689

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from groq import Groq
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ import os
5
+ import tempfile
6
+
7
+ # -------------------- API Configuration --------------------
8
+ # Set your API keys as environment variables in Hugging Face Spaces settings
9
+ GROQ_API_KEY = os.environ.get("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", "")
10
+ GOOGLE_API_KEY = os.environ.get("AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk", "")
11
+
12
+ # Initialize clients
13
+ client = Groq(api_key=GROQ_API_KEY)
14
+ llm = ChatGoogleGenerativeAI(
15
+ model="gemini-2.0-flash",
16
+ google_api_key=GOOGLE_API_KEY,
17
+ max_output_tokens=500
18
+ )
19
+
20
+ # -------------------- Core Functions --------------------
21
+ def transcribe_audio(audio_path, language="ar"):
22
+ """Transcribe audio file using Groq Whisper"""
23
+ try:
24
+ with open(audio_path, "rb") as audio_file:
25
+ transcription = client.audio.transcriptions.create(
26
+ file=(os.path.basename(audio_path), audio_file.read()),
27
+ model="whisper-large-v3-turbo",
28
+ response_format="verbose_json",
29
+ language=language
30
+ )
31
+ return transcription.text, transcription.language
32
+ except Exception as e:
33
+ return f"Error in transcription: {str(e)}", None
34
+
35
+ def get_ai_response(text):
36
+ """Get AI response from Gemini"""
37
+ try:
38
+ response = llm.invoke(text)
39
+ return response.content
40
+ except Exception as e:
41
+ return f"Error getting AI response: {str(e)}"
42
+
43
+ def text_to_speech(text, language="ar"):
44
+ """Convert text to speech using Groq TTS"""
45
+ try:
46
+ # Select voice based on language
47
+ if language == "ar":
48
+ voice = "Amira-PlayAI"
49
+ model = "playai-tts-arabic"
50
+ else:
51
+ voice = "alloy" # Default English voice
52
+ model = "playai-tts"
53
+
54
+ response = client.audio.speech.create(
55
+ model=model,
56
+ voice=voice,
57
+ response_format="mp3",
58
+ input=text,
59
+ )
60
+
61
+ # Save to temporary file
62
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
63
+ response.write_to_file(temp_file.name)
64
+ return temp_file.name
65
+ except Exception as e:
66
+ return None
67
+
68
+ # -------------------- Gradio Interface Function --------------------
69
+ def process_voice_chat(audio, language_choice):
70
+ """Main function to process voice input and generate response"""
71
+
72
+ if audio is None:
73
+ return "Please provide an audio input", "", None
74
+
75
+ # Map language choice to code
76
+ lang_map = {"Arabic": "ar", "English": "en"}
77
+ lang_code = lang_map.get(language_choice, "ar")
78
+
79
+ # Step 1: Transcribe audio
80
+ transcription, detected_lang = transcribe_audio(audio, lang_code)
81
+
82
+ if transcription.startswith("Error"):
83
+ return transcription, "", None
84
+
85
+ # Step 2: Get AI response
86
+ ai_response = get_ai_response(transcription)
87
+
88
+ if ai_response.startswith("Error"):
89
+ return transcription, ai_response, None
90
+
91
+ # Step 3: Convert response to speech
92
+ # Use detected language if available, otherwise use selected language
93
+ output_lang = detected_lang if detected_lang else lang_code
94
+ audio_output = text_to_speech(ai_response, output_lang)
95
+
96
+ return transcription, ai_response, audio_output
97
+
98
+ # -------------------- Gradio Interface --------------------
99
+ with gr.Blocks(title="Voice Chat Assistant", theme=gr.themes.Soft()) as demo:
100
+ gr.Markdown(
101
+ """
102
+ # 🎤 Voice Chat Assistant
103
+ Upload an audio file or record your voice to chat with AI.
104
+ The AI will respond in the same language!
105
+ """
106
+ )
107
+
108
+ with gr.Row():
109
+ with gr.Column():
110
+ language = gr.Radio(
111
+ choices=["Arabic", "English"],
112
+ value="Arabic",
113
+ label="Select Language"
114
+ )
115
+ audio_input = gr.Audio(
116
+ sources=["microphone", "upload"],
117
+ type="filepath",
118
+ label="Record or Upload Audio"
119
+ )
120
+ submit_btn = gr.Button("Process", variant="primary", size="lg")
121
+
122
+ with gr.Column():
123
+ transcription_output = gr.Textbox(
124
+ label="Your Message (Transcription)",
125
+ lines=3
126
+ )
127
+ ai_response_output = gr.Textbox(
128
+ label="AI Response",
129
+ lines=5
130
+ )
131
+ audio_output = gr.Audio(
132
+ label="AI Voice Response",
133
+ type="filepath"
134
+ )
135
+
136
+ # Button action
137
+ submit_btn.click(
138
+ fn=process_voice_chat,
139
+ inputs=[audio_input, language],
140
+ outputs=[transcription_output, ai_response_output, audio_output]
141
+ )
142
+
143
+ gr.Markdown(
144
+ """
145
+ ### 📝 Instructions:
146
+ 1. Select your language (Arabic or English)
147
+ 2. Record your voice using the microphone or upload an audio file
148
+ 3. Click "Process" to get AI response with voice output
149
+
150
+ ### 🔑 Setup for Hugging Face Spaces:
151
+ Add these secrets in your Space settings:
152
+ - `GROQ_API_KEY`: Your Groq API key
153
+ - `GOOGLE_API_KEY`: Your Google API key
154
+ """
155
+ )
156
+
157
+ # Launch the app
158
+ if __name__ == "__main__":
159
+ demo.launch()