Spaces:

tudeplom
/

tts

Sleeping

App Files Files Community

tudeplom commited on Feb 24, 2025

Commit

78f7e4b

verified ·

1 Parent(s): d44e8cd

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -44

app.py CHANGED Viewed

@@ -6,20 +6,27 @@ import torch
 import os
 import uvicorn
 app = FastAPI()
 HF_API_KEY = os.getenv("HF_API_KEY")
 if not HF_API_KEY:
     raise ValueError("❌ Thiếu HF_API_KEY!")
 client = InferenceClient(token=HF_API_KEY)
 TEMP_DIR = "temp"
 os.makedirs(TEMP_DIR, exist_ok=True)
 STT_MODEL = "openai/whisper-tiny.en"
 TTS_MODEL = "facebook/mms-tts-eng"
 LLAMA_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
 try:
     llama_pipeline = pipeline(
         "text-generation",
@@ -32,13 +39,14 @@ except Exception as e:
     print(f"❌ Lỗi tải LLaMA: {e}")
     raise
 HTML_CONTENT = """
 <!DOCTYPE html>
 <html lang="vi">
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Chatbot TTS, STT & LLaMA 3</title>
     <style>
         body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background: #f0f0f0; }
         .chat-container { max-width: 600px; margin: auto; }
@@ -52,14 +60,13 @@ HTML_CONTENT = """
 </head>
 <body>
     <div class="chat-container">
-        <h1>Chatbot TTS, STT & LLaMA 3</h1>
         <div class="chat-box" id="chatBox"></div>
         <div class="input-area">
-            <input type="text" id="textInput" placeholder="Nhập văn bản hoặc hỏi LLaMA">
-            <button onclick="sendText()">Gửi TTS</button>
-            <button onclick="askLlama()">Hỏi LLaMA</button>
-            <button id="recordButton" onclick="startRecording()">Bắt đầu ghi âm</button>
-            <button id="stopButton" onclick="stopRecording()" disabled>Dừng ghi âm</button>
         </div>
         <audio id="audioPlayer" controls style="display: none;"></audio>
     </div>
@@ -68,47 +75,55 @@ HTML_CONTENT = """
         let mediaRecorder;
         let audioChunks = [];
-        async function sendText() {
             const text = document.getElementById('textInput').value;
             if (!text) return;
             addMessage('Bạn: ' + text);
-            const response = await fetch('/tts', {
-                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({ text: text })
-            });
-            const blob = await response.blob();
-            const url = URL.createObjectURL(blob);
-            const audio = document.getElementById('audioPlayer');
-            audio.src = url;
-            audio.style.display = 'block';
-            audio.play();
-            addMessage('Bot: Đã tạo âm thanh!');
-        }
-        async function askLlama() {
-            const text = document.getElementById('textInput').value;
-            if (!text) return;
-            addMessage('Bạn: ' + text);
-            const response = await fetch('/llama', {
-                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({ prompt: text })
-            });
-            const data = await response.json();
-            if (data.text) {
-                addMessage('LLaMA: ' + data.text);
-            } else {
-                addMessage('LLaMA: Lỗi - ' + (data.error || 'Không có phản hồi'));
             }
         }
         async function startRecording() {
             const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
             mediaRecorder = new MediaRecorder(stream);
             audioChunks = [];
             mediaRecorder.ondataavailable = event => audioChunks.push(event.data);
-            mediaRecorder.onstop = sendAudio;
             mediaRecorder.start();
             document.getElementById('recordButton').disabled = true;
             document.getElementById('stopButton').disabled = false;
@@ -121,17 +136,51 @@ HTML_CONTENT = """
             document.getElementById('stopButton').disabled = true;
         }
-        async function sendAudio() {
             const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
             const formData = new FormData();
             formData.append('file', audioBlob, 'recording.wav');
-            const response = await fetch('/stt', {
-                method: 'POST',
-                body: formData
-            });
-            const data = await response.json();
-            if (data.text) addMessage('Bot: ' + data.text);
-            else addMessage('Bot: Lỗi - ' + data.error);
         }
         function addMessage(message) {
@@ -187,6 +236,47 @@ async def generate_text(prompt: str):
         print(f"❌ Lỗi LLaMA: {e}")
         return {"error": str(e)}
 if __name__ == "__main__":
     print("🚀 Khởi động FastAPI Server...")
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import uvicorn
+# Khởi tạo FastAPI
 app = FastAPI()
+# Lấy API key từ biến môi trường
 HF_API_KEY = os.getenv("HF_API_KEY")
 if not HF_API_KEY:
     raise ValueError("❌ Thiếu HF_API_KEY!")
+# Khởi tạo Hugging Face Client cho TTS/STT
 client = InferenceClient(token=HF_API_KEY)
+# Tạo thư mục lưu file tạm
 TEMP_DIR = "temp"
 os.makedirs(TEMP_DIR, exist_ok=True)
+# Mô hình TTS, STT và LLaMA
 STT_MODEL = "openai/whisper-tiny.en"
 TTS_MODEL = "facebook/mms-tts-eng"
 LLAMA_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
+# Tải pipeline LLaMA 3
 try:
     llama_pipeline = pipeline(
         "text-generation",
     print(f"❌ Lỗi tải LLaMA: {e}")
     raise
+# Giao diện HTML
 HTML_CONTENT = """
 <!DOCTYPE html>
 <html lang="vi">
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Chatbot Tự Động</title>
     <style>
         body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background: #f0f0f0; }
         .chat-container { max-width: 600px; margin: auto; }
 </head>
 <body>
     <div class="chat-container">
+        <h1>Chatbot Tự Động</h1>
         <div class="chat-box" id="chatBox"></div>
         <div class="input-area">
+            <input type="text" id="textInput" placeholder="Nhập câu hỏi hoặc nhấn Enter" onkeypress="if(event.key === 'Enter') sendChat()">
+            <button onclick="sendChat()">Gửi</button>
+            <button id="recordButton" onclick="startRecording()">Ghi âm</button>
+            <button id="stopButton" onclick="stopRecording()" disabled>Dừng</button>
         </div>
         <audio id="audioPlayer" controls style="display: none;"></audio>
     </div>
         let mediaRecorder;
         let audioChunks = [];
+        // Hàm gửi chat (văn bản -> LLaMA -> TTS)
+        async function sendChat() {
             const text = document.getElementById('textInput').value;
             if (!text) return;
             addMessage('Bạn: ' + text);
+            document.getElementById('textInput').value = ''; // Xóa input sau khi gửi
+            try {
+                // Gửi tới endpoint /chat (tích hợp LLaMA và TTS)
+                const response = await fetch('/chat', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ prompt: text })
+                });
+                if (response.ok) {
+                    const blob = await response.blob();
+                    const url = URL.createObjectURL(blob);
+                    const audio = document.getElementById('audioPlayer');
+                    audio.src = url;
+                    audio.style.display = 'block';
+                    audio.play();
+                    // Lấy văn bản từ LLaMA để hiển thị
+                    const textResponse = await fetch('/llama', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ prompt: text })
+                    });
+                    const textData = await textResponse.json();
+                    if (textData.text) {
+                        addMessage('Bot: ' + textData.text);
+                    }
+                } else {
+                    const errorData = await response.json();
+                    addMessage('Bot: Lỗi - ' + (errorData.error || 'Không có phản hồi'));
+                }
+            } catch (e) {
+                addMessage('Bot: Lỗi kết nối - ' + e.message);
             }
         }
+        // Ghi âm (STT -> LLaMA -> TTS)
         async function startRecording() {
             const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
             mediaRecorder = new MediaRecorder(stream);
             audioChunks = [];
             mediaRecorder.ondataavailable = event => audioChunks.push(event.data);
+            mediaRecorder.onstop = processAudio;
             mediaRecorder.start();
             document.getElementById('recordButton').disabled = true;
             document.getElementById('stopButton').disabled = false;
             document.getElementById('stopButton').disabled = true;
         }
+        async function processAudio() {
             const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
             const formData = new FormData();
             formData.append('file', audioBlob, 'recording.wav');
+            try {
+                // Gửi tới endpoint /audio_chat (STT -> LLaMA -> TTS)
+                const response = await fetch('/audio_chat', {
+                    method: 'POST',
+                    body: formData
+                });
+                if (response.ok) {
+                    const blob = await response.blob();
+                    const url = URL.createObjectURL(blob);
+                    const audio = document.getElementById('audioPlayer');
+                    audio.src = url;
+                    audio.style.display = 'block';
+                    audio.play();
+                    // Lấy văn bản STT và LLaMA để hiển thị
+                    const sttResponse = await fetch('/stt', {
+                        method: 'POST',
+                        body: formData
+                    });
+                    const sttData = await sttResponse.json();
+                    if (sttData.text) {
+                        addMessage('Bạn: ' + sttData.text);
+                        const llamaResponse = await fetch('/llama', {
+                            method: 'POST',
+                            headers: { 'Content-Type': 'application/json' },
+                            body: JSON.stringify({ prompt: sttData.text })
+                        });
+                        const llamaData = await llamaResponse.json();
+                        if (llamaData.text) {
+                            addMessage('Bot: ' + llamaData.text);
+                        }
+                    }
+                } else {
+                    const errorData = await response.json();
+                    addMessage('Bot: Lỗi - ' + (errorData.error || 'Không có phản hồi'));
+                }
+            } catch (e) {
+                addMessage('Bot: Lỗi kết nối - ' + e.message);
+            }
         }
         function addMessage(message) {
         print(f"❌ Lỗi LLaMA: {e}")
         return {"error": str(e)}
+# Endpoint tích hợp văn bản -> LLaMA -> TTS
+@app.post("/chat")
+async def chat(prompt: str):
+    try:
+        # Gửi tới LLaMA
+        llama_output = llama_pipeline(prompt, max_new_tokens=100)[0]["generated_text"]
+        print(f"LLaMA output: {llama_output}")
+        # Tạo TTS từ output của LLaMA
+        output_path = os.path.join(TEMP_DIR, "output.wav")
+        audio = client.text_to_speech(model=TTS_MODEL, text=llama_output)
+        with open(output_path, "wb") as f:
+            f.write(audio)
+        return FileResponse(output_path, media_type="audio/wav", filename="output.wav")
+    except Exception as e:
+        print(f"❌ Lỗi chat: {e}")
+        return {"error": str(e)}
+# Endpoint tích hợp STT -> LLaMA -> TTS
+@app.post("/audio_chat")
+async def audio_chat(file: UploadFile = File(...)):
+    try:
+        # STT: Chuyển giọng nói thành văn bản
+        audio_data = await file.read()
+        stt_output = client.automatic_speech_recognition(model=STT_MODEL, data=audio_data).get("text", "")
+        print(f"STT output: {stt_output}")
+        # LLaMA: Sinh câu trả lời
+        llama_output = llama_pipeline(stt_output, max_new_tokens=100)[0]["generated_text"]
+        print(f"LLaMA output: {llama_output}")
+        # TTS: Chuyển câu trả lời thành âm thanh
+        output_path = os.path.join(TEMP_DIR, "output.wav")
+        audio = client.text_to_speech(model=TTS_MODEL, text=llama_output)
+        with open(output_path, "wb") as f:
+            f.write(audio)
+        return FileResponse(output_path, media_type="audio/wav", filename="output.wav")
+    except Exception as e:
+        print(f"❌ Lỗi audio_chat: {e}")
+        return {"error": str(e)}
 if __name__ == "__main__":
     print("🚀 Khởi động FastAPI Server...")
     uvicorn.run(app, host="0.0.0.0", port=7860)