Adanbalf commited on
Commit
42fbbb9
·
verified ·
1 Parent(s): 7bfa692

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Request
2
+ from fastapi.responses import StreamingResponse, JSONResponse
3
+ from transformers import pipeline
4
+ from TTS.api import TTS
5
+ import whisper
6
+ import torch
7
+ from io import BytesIO
8
+ from PIL import Image
9
+ import base64
10
+ import os
11
+
12
+ app = FastAPI(title="NasFit AI Server")
13
+
14
+ # 🔐 API KEY
15
+ API_KEY = os.getenv("API_KEY", "nasfit_secret_key")
16
+
17
+ # 🧠 Carga de modelos
18
+ print("Cargando modelos...")
19
+
20
+ chat_pipe = pipeline("text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct")
21
+ vision_pipe = pipeline("image-text-to-text", model="lmms-lab/llava-onevision-1.6-7b-hf")
22
+ whisper_model = whisper.load_model("small")
23
+ tts = TTS("coqui/XTTS-v2")
24
+
25
+ print("✅ Modelos listos.")
26
+
27
+ # -------------------------------
28
+ # Auth
29
+ # -------------------------------
30
+ async def check_auth(request: Request):
31
+ auth = request.headers.get("Authorization", "")
32
+ if not auth or not auth.startswith("Bearer ") or auth.split(" ")[1] != API_KEY:
33
+ return False
34
+ return True
35
+
36
+ # -------------------------------
37
+ # Chat + Vision
38
+ # -------------------------------
39
+ @app.post("/v1/chat/completions")
40
+ async def chat_endpoint(request: Request):
41
+ if not await check_auth(request):
42
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
43
+ payload = await request.json()
44
+ messages = payload.get("messages", [])
45
+ model = payload.get("model", "llama3")
46
+
47
+ # Soporte de imagen
48
+ image_content = None
49
+ text_content = ""
50
+ for msg in messages:
51
+ content = msg.get("content", "")
52
+ if isinstance(content, list):
53
+ for c in content:
54
+ if c.get("type") == "text":
55
+ text_content += c.get("text", "")
56
+ elif c.get("type") == "image_url":
57
+ img_url = c["image_url"]["url"]
58
+ if img_url.startswith("data:image"):
59
+ image_content = Image.open(BytesIO(base64.b64decode(img_url.split(",")[1])))
60
+ else:
61
+ text_content += content
62
+
63
+ if image_content:
64
+ response = vision_pipe(text_content, images=image_content)[0]["generated_text"]
65
+ else:
66
+ response = chat_pipe(text_content, max_new_tokens=300)[0]["generated_text"]
67
+
68
+ return {"choices": [{"message": {"content": response}}]}
69
+
70
+ # -------------------------------
71
+ # Speech to text
72
+ # -------------------------------
73
+ @app.post("/v1/audio/transcriptions")
74
+ async def transcribe(request: Request, file: UploadFile = File(...)):
75
+ if not await check_auth(request):
76
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
77
+ audio = await file.read()
78
+ with open("temp.wav", "wb") as f:
79
+ f.write(audio)
80
+ result = whisper_model.transcribe("temp.wav")
81
+ return {"text": result["text"]}
82
+
83
+ # -------------------------------
84
+ # Text to speech
85
+ # -------------------------------
86
+ @app.post("/v1/audio/speech")
87
+ async def tts_endpoint(request: Request):
88
+ if not await check_auth(request):
89
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
90
+ payload = await request.json()
91
+ text = payload.get("input", "")
92
+ voice = payload.get("voice", "es_male_01")
93
+ tts.tts_to_file(text=text, file_path="output.wav", speaker=voice)
94
+ with open("output.wav", "rb") as f:
95
+ audio = f.read()
96
+ return StreamingResponse(BytesIO(audio), media_type="audio/wav")