PedroM2626 commited on
Commit
b650615
·
1 Parent(s): fd55eb5

feat: add virtual assistant with speech and text interaction

Browse files
Files changed (2) hide show
  1. app.py +168 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import tempfile
4
+ import time
5
+ import urllib.parse
6
+ import webbrowser
7
+ import gradio as gr
8
+ from dotenv import load_dotenv
9
+
10
+ # Patch para compatibilidade com Python 3.13+
11
+ if sys.version_info >= (3, 13):
12
+ import types
13
+ sys.modules['aifc'] = types.ModuleType('aifc')
14
+ sys.modules['audioop'] = types.ModuleType('audioop')
15
+
16
+ # Lazy imports para o Whisper e gTTS
17
+ whisper_model = None
18
+
19
+ def get_whisper_model():
20
+ global whisper_model
21
+ if whisper_model is None:
22
+ import whisper
23
+ print("Carregando modelo Whisper (Local e Gratuito)...")
24
+ whisper_model = whisper.load_model("base")
25
+ return whisper_model
26
+
27
+ def get_chatgpt_response(text, api_key):
28
+ if not api_key or api_key == "sua_chave_api_aqui":
29
+ return "Erro: Chave API da OpenAI não configurada no arquivo .env."
30
+
31
+ try:
32
+ import openai
33
+ client = openai.OpenAI(api_key=api_key)
34
+ response = client.chat.completions.create(
35
+ model="gpt-3.5-turbo",
36
+ messages=[
37
+ {"role": "system", "content": "Você é um assistente virtual útil e conciso. Responda em português."},
38
+ {"role": "user", "content": text}
39
+ ]
40
+ )
41
+ return response.choices[0].message.content
42
+ except Exception as e:
43
+ return f"Erro na IA: {str(e)}"
44
+
45
+ def try_local_commands(text):
46
+ s = (text or "").lower()
47
+ if "wikipedia" in s:
48
+ query = s.replace("wikipedia", "").replace("pesquisar", "").strip()
49
+ if not query:
50
+ return "O que devo pesquisar na Wikipedia?"
51
+ url = "https://pt.wikipedia.org/wiki/Special:Search?search=" + urllib.parse.quote_plus(query)
52
+ webbrowser.open(url)
53
+ return f"Pesquisando '{query}' na Wikipedia."
54
+
55
+ if "youtube" in s or "vídeo" in s or "video" in s:
56
+ query = s.replace("youtube", "").replace("vídeo", "").replace("video", "").replace("pesquisar", "").strip()
57
+ if not query:
58
+ return "O que devo pesquisar no YouTube?"
59
+ url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
60
+ webbrowser.open(url)
61
+ return f"Pesquisando '{query}' no YouTube."
62
+
63
+ if "farmácia" in s or "farmacia" in s:
64
+ webbrowser.open("https://www.google.com/maps/search/farmacia+perto+de+mim")
65
+ return "Abrindo mapa de farmácias próximas."
66
+
67
+ return None
68
+
69
+ def text_to_speech(text):
70
+ try:
71
+ from gtts import gTTS
72
+ tts = gTTS(text=text, lang='pt')
73
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
74
+ tts.save(temp_file.name)
75
+ return temp_file.name
76
+ except Exception as e:
77
+ print(f"Erro TTS: {e}")
78
+ return None
79
+
80
+ def process_interaction(audio_path, text_input, history, api_key):
81
+ # Inicializar histórico se for None
82
+ if history is None:
83
+ history = []
84
+
85
+ # Determinar a entrada (áudio ou texto)
86
+ input_text = ""
87
+
88
+ try:
89
+ if audio_path:
90
+ print(f"Processando áudio de: {audio_path}")
91
+ model = get_whisper_model()
92
+ result = model.transcribe(audio_path, language="pt", fp16=False)
93
+ input_text = result["text"].strip()
94
+ print(f"Transcrição Whisper: {input_text}")
95
+ elif text_input:
96
+ input_text = text_input
97
+ print(f"Entrada de texto: {input_text}")
98
+
99
+ if not input_text:
100
+ return history, "", gr.update()
101
+
102
+ # Processar comando local primeiro
103
+ response_text = try_local_commands(input_text)
104
+
105
+ # Se não for comando local, usar IA
106
+ if response_text is None:
107
+ print("Usando IA para responder...")
108
+ response_text = get_chatgpt_response(input_text, api_key)
109
+
110
+ print(f"Resposta: {response_text}")
111
+
112
+ # Gerar áudio
113
+ audio_response = text_to_speech(response_text)
114
+
115
+ # Atualizar histórico (Gradio 4+ prefere lista de dicts ou lista de listas)
116
+ history.append({"role": "user", "content": input_text})
117
+ history.append({"role": "assistant", "content": response_text})
118
+
119
+ return history, "", audio_response if audio_response else gr.update()
120
+
121
+ except Exception as e:
122
+ error_msg = f"Erro no processamento: {str(e)}"
123
+ print(error_msg)
124
+ history.append({"role": "user", "content": input_text if input_text else "???"})
125
+ history.append({"role": "assistant", "content": error_msg})
126
+ return history, "", gr.update()
127
+
128
+ def main():
129
+ load_dotenv()
130
+ api_key = os.getenv("OPENAI_API_KEY", "")
131
+
132
+ with gr.Blocks(title="Assistente Virtual IA") as demo:
133
+ gr.Markdown("# 🤖 Assistente Virtual com IA")
134
+ gr.Markdown("Fale com o assistente ou digite um comando. Ele pode pesquisar no Wikipedia, YouTube ou conversar via ChatGPT.")
135
+
136
+ with gr.Row():
137
+ with gr.Column(scale=2):
138
+ chatbot = gr.Chatbot(label="Conversa")
139
+ audio_output = gr.Audio(label="Resposta em Áudio", autoplay=True)
140
+
141
+ with gr.Column(scale=1):
142
+ audio_input = gr.Audio(label="Fale aqui", type="filepath")
143
+ text_input = gr.Textbox(label="Ou digite aqui", placeholder="Ex: Pesquisar Wikipedia sobre Python")
144
+ btn_send = gr.Button("Enviar", variant="primary")
145
+ btn_clear = gr.Button("Limpar Conversa")
146
+
147
+ # Estado para a chave API (pega do .env inicialmente)
148
+ api_key_state = gr.State(value=api_key)
149
+
150
+ # Eventos
151
+ btn_send.click(
152
+ process_interaction,
153
+ inputs=[audio_input, text_input, chatbot, api_key_state],
154
+ outputs=[chatbot, text_input, audio_output]
155
+ )
156
+
157
+ text_input.submit(
158
+ process_interaction,
159
+ inputs=[audio_input, text_input, chatbot, api_key_state],
160
+ outputs=[chatbot, text_input, audio_output]
161
+ )
162
+
163
+ btn_clear.click(lambda: ([], "", gr.update(value=None)), None, [chatbot, text_input, audio_output])
164
+
165
+ demo.launch(share=True)
166
+
167
+ if __name__ == "__main__":
168
+ main()
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pyttsx3==2.90
2
+ python-dotenv==1.0.1
3
+ pytest==8.3.3
4
+ jupytext==1.16.2
5
+ openai>=1.0.0
6
+ openai-whisper
7
+ gTTS
8
+ pygame
9
+ pydub
10
+ numpy
11
+ requests
12
+ sounddevice
13
+ scipy
14
+ gradio