chc / app.py
Carley1234's picture
Upload 4 files
7bdb16c verified
import asyncio
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# DeepSeek-Coder es excelente para CHC (Creative Engine Code Helper)
model_name = "deepseek-ai/DeepSeek-Coder-1.3B-Instruct"
print(f"Cargando modelo de código {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
lock = asyncio.Lock()
@app.get("/")
async def root():
return {"message": "Carl CHC (Code Helper) API está activa"}
@app.post("/generate")
async def generate(request: Request):
if lock.locked():
return {
"status": "busy",
"message": "espera Carl te atendera en seguida no pierdas la paciencia"
}
async with lock:
try:
data = await request.json()
prompt = data.get("prompt", "")
system_prompt = data.get("system_prompt", "Eres un experto programador para el motor Creative Engine.")
# Formato de chat para DeepSeek
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=1024,
do_sample=False, # Usamos Greedy para código más determinista
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return {"status": "success", "text": response}
except Exception as e:
return {"status": "error", "message": str(e)}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)