Englsih-LLM-TTS / main.py
MaenGit
update main
b41f222
import os
import json
import httpx
import asyncio
import logging
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import edge_tts
import uvicorn
import base64
# إعدادات اللوج
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
# تفعيل CORS للاتصال مع Next.js
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
OLLAMA_URL = "http://localhost:11434/api/chat"
class ChatRequest(BaseModel):
messages: list
voice: str = "ar-SA-HamedNeural"
rate: str = "+0%"
async def stream_text_and_voice(payload,messages, voice, rate):
full_response_text = ""
sentence_buffer = ""
async with httpx.AsyncClient(timeout=None) as client:
try:
async with client.stream("POST", OLLAMA_URL, json=payload) as response:
async for line in response.aiter_lines():
if not line: continue
chunk = json.loads(line)
token = chunk.get("message", {}).get("content", "")
sentence_buffer += token
full_response_text += token
# Check for sentence end
if any(punct in token for punct in [".", "!", "?", "؟", "\n"]):
clean_text = sentence_buffer.strip()
print(clean_text)
if clean_text:
# 1. إنشاء كائن التواصل مع edge-tts
communicate = edge_tts.Communicate(clean_text, voice, rate=rate)
# 2. تجميع البيانات الصوتية في الذاكرة (Memory)
audio_data = b""
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
# 3. تحويل البيانات الصوتية الكاملة للجملة إلى Base64
if audio_data:
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
# 4. إرسالها كسطر JSON واحد للمتصفح
yield f'{{ "type": "audio", "data": "{audio_base64}" }}\n'
sentence_buffer = "" # تصغير البفر لبدء جملة جديدة
# Handle remaining text in buffer
if sentence_buffer.strip():
communicate = edge_tts.Communicate(sentence_buffer.strip(), voice, rate=rate)
async for audio_chunk in communicate.stream():
if audio_chunk["type"] == "audio":
b64_data = base64.b64encode(audio_chunk["data"]).decode('utf-8')
yield json.dumps({"type": "audio", "data": b64_data}) + "\n"
# THE IMPORTANT PART: Send the text message at the end
yield json.dumps({
"type": "final_text",
"content": full_response_text
}) + "\n"
except Exception as e:
logger.error(f"Error: {e}")
@app.post("/stream-voice")
async def voice_engine(data: ChatRequest):
payload = {
"model": "llama3.2:1b", # Or whatever model you are using
"messages": data.messages,
"stream": True, # Crucial for streaming
"options": {
"temperature": 0.5,
"top_p": 0.9,
}
}
return StreamingResponse(
stream_text_and_voice(payload,data.messages, data.voice, data.rate),
media_type="audio/mpeg",
headers={"Cache-Control":"no-cache"}
)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)