| import os |
| import sqlite3 |
| import torch |
| import uvicorn |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from fastapi_poe import PoeBot, make_app |
| from fastapi_poe.types import QueryRequest, PartialResponse |
|
|
| |
| MODEL_ID = "Finisha-F-scratch/Nelya-neko" |
| print("--> Chargement du tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
| print("--> Chargement du modèle (cette étape peut prendre du temps)...") |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
| device_map="auto" |
| ) |
|
|
| |
| DB_NAME = os.path.join(os.path.expanduser("~"), "charlotte_api.db") |
|
|
| def init_db(): |
| conn = sqlite3.connect(DB_NAME) |
| c = conn.cursor() |
| c.execute('''CREATE TABLE IF NOT EXISTS api_keys (key TEXT PRIMARY KEY, requests_count INTEGER)''') |
| conn.commit() |
| conn.close() |
| init_db() |
|
|
| |
| class CharlottePoeBot(PoeBot): |
| async def get_response(self, request: QueryRequest): |
| |
| last_message = request.query[-1].content |
| |
| |
| inputs = tokenizer(last_message, return_tensors="pt", truncation=True, max_length=128).to(model.device) |
| |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=200, |
| do_sample=True, |
| temperature=0.7, |
| pad_token_id=tokenizer.eos_token_id |
| ) |
| |
| full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| response_text = full_text[len(last_message):].strip() |
| |
| |
| yield PartialResponse(text=response_text) |
|
|
| |
| bot = CharlottePoeBot() |
| app = make_app(bot) |
|
|
| |
| if __name__ == "__main__": |
| port_to_use = int(os.environ.get("PORT", 7860)) |
| print(f"--> Démarrage d'Uvicorn sur le port {port_to_use}...") |
| |
| uvicorn.run(app, host="0.0.0.0", port=port_to_use) |
| |