Alamgirapi commited on
Commit
ffe7e3d
·
verified ·
1 Parent(s): 8392c67

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from src.response.gpt import gpt_response
3
+ from src.SpeechToText.sr import transcribe_audio, clear_history
4
+ from src.SpeechToText.hamsa import transcribe_audio_hamsa
5
+ from datetime import datetime
6
+ from loguru import logger
7
+
8
+
9
+ # Create Gradio Interface
10
+ with gr.Blocks(title="Multilingual Speech to Text") as iface:
11
+ gr.Markdown("# 🎙️ Multilingual Speech to Text (Arabic & English)")
12
+ gr.Markdown("Speak in Arabic or English, or let the system auto-detect the language!")
13
+
14
+ with gr.Row():
15
+ with gr.Column(scale=1):
16
+ language_selector = gr.Dropdown(
17
+ choices=[
18
+ "English",
19
+ "Arabic",
20
+ "Arabic (Egypt)",
21
+ "Arabic (UAE)",
22
+ "Arabic (Lebanon)",
23
+ "Arabic (Saudi Arabia)",
24
+ "Arabic (Kuwait)",
25
+ "Arabic (Jordan)",
26
+ "Auto-detect"
27
+ ],
28
+ value="Auto-detect",
29
+ label="Select Language"
30
+ )
31
+
32
+ audio_input = gr.Audio(
33
+ sources=["microphone", "upload"],
34
+ type="filepath",
35
+ label="🎤 Speak or Upload Audio"
36
+ )
37
+
38
+ with gr.Row():
39
+ submit_btn = gr.Button("🔄 Transcribe", variant="primary")
40
+ clear_btn = gr.Button("🗑️ Clear History", variant="secondary")
41
+
42
+ with gr.Column(scale=1):
43
+ current_output = gr.Textbox(
44
+ label="Current Transcription",
45
+ placeholder="Your transcribed text will appear here...",
46
+ lines=3,
47
+ rtl=True # Right-to-left for Arabic text
48
+ )
49
+
50
+ gpt_output = gr.Textbox(
51
+ label="AI Therapeutic Response",
52
+ placeholder="AI response will appear here...",
53
+ lines=5,
54
+ rtl=True,
55
+ interactive=False
56
+ )
57
+
58
+ history_output = gr.Textbox(
59
+ label="Conversation History",
60
+ placeholder="All transcriptions will be saved here with timestamps...",
61
+ lines=10,
62
+ max_lines=20,
63
+ interactive=False
64
+ )
65
+
66
+ # State to maintain history
67
+ history_state = gr.State("")
68
+
69
+ # Function to process transcription and get GPT response
70
+ def process_audio_and_respond(audio, language, history):
71
+ # Get transcription
72
+ try:
73
+ updated_history, current_text = transcribe_audio(audio, language, history)
74
+ logger.info(f"Transcription successful: {current_text}")
75
+ except Exception as e:
76
+ updated_history, current_text = transcribe_audio_hamsa(audio, language, history)
77
+ logger.error(f"Transcription failed. Apply Fallback with Hamsa API: {e}")
78
+ if not current_text:
79
+ current_text = "Transcription failed. Please try again."
80
+
81
+ # Get GPT response if there's transcribed text
82
+ gpt_result = ""
83
+ if current_text and current_text.strip():
84
+ response = gpt_response(current_text)
85
+ gpt_result = f"Response: {response['response']} \n\nEmotion: {response['emotional_state']}"
86
+
87
+ # Update history with both query and answer
88
+
89
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
90
+ detected_lang = response.get('detected_language', 'Unknown')
91
+
92
+ # Format the history entry
93
+ history_entry = f"[{timestamp}] [{language}] [{detected_lang}]\n"
94
+ history_entry += f"Query: {current_text}\n"
95
+ history_entry += f"Answer: {response['response']}"
96
+ history_entry += "-----------------------\n\n"
97
+
98
+ # Add to history
99
+ if updated_history:
100
+ updated_history = history_entry + updated_history
101
+ else:
102
+ updated_history = history_entry
103
+
104
+ return updated_history, current_text, gpt_result
105
+
106
+ # Event handlers
107
+ submit_btn.click(
108
+ fn=process_audio_and_respond,
109
+ inputs=[audio_input, language_selector, history_state],
110
+ outputs=[history_state, current_output, gpt_output]
111
+ ).then(
112
+ fn=lambda h: h,
113
+ inputs=[history_state],
114
+ outputs=[history_output]
115
+ )
116
+
117
+ clear_btn.click(
118
+ fn=clear_history,
119
+ outputs=[history_state, history_output]
120
+ )
121
+
122
+ # Auto-submit when audio is uploaded/recorded
123
+ audio_input.change(
124
+ fn=process_audio_and_respond,
125
+ inputs=[audio_input, language_selector, history_state],
126
+ outputs=[history_state, current_output, gpt_output]
127
+ ).then(
128
+ fn=lambda h: h,
129
+ inputs=[history_state],
130
+ outputs=[history_output]
131
+ )
132
+
133
+ if __name__ == "__main__":
134
+ iface.launch(server_name="0.0.0.0", server_port=7860, share=True)