|
|
from flask import Flask, request, jsonify |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
print("π Loading Dolphin-Phi-2 (uncensored)...") |
|
|
|
|
|
model_name = "cognitivecomputations/dolphin-2_6-phi-2" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float32, |
|
|
device_map="cpu", |
|
|
low_cpu_mem_usage=True, |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
print("β
Model loaded!") |
|
|
|
|
|
@app.route('/v1/chat/completions', methods=['POST']) |
|
|
def generate(): |
|
|
try: |
|
|
data = request.json |
|
|
messages = data.get('messages', []) |
|
|
max_tokens = data.get('max_tokens', 300) |
|
|
temperature = data.get('temperature', 0.8) |
|
|
|
|
|
system_msg = "" |
|
|
user_msg = "" |
|
|
|
|
|
for msg in messages: |
|
|
if msg['role'] == 'system': |
|
|
system_msg = msg['content'] |
|
|
elif msg['role'] == 'user': |
|
|
user_msg = msg['content'] |
|
|
|
|
|
prompt = f"<|im_start|>system\n{system_msg}<|im_end|>\n<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_tokens, |
|
|
temperature=temperature, |
|
|
top_p=0.9, |
|
|
do_sample=True, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
response_text = full_response.split("<|im_start|>assistant")[-1].replace("<|im_end|>", "").strip() |
|
|
|
|
|
return jsonify({ |
|
|
"choices": [{ |
|
|
"message": { |
|
|
"role": "assistant", |
|
|
"content": response_text |
|
|
} |
|
|
}] |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error: {str(e)}") |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
@app.route('/health', methods=['GET']) |
|
|
def health(): |
|
|
return jsonify({"status": "ok", "model": "dolphin-phi-2"}) |
|
|
|
|
|
@app.route('/', methods=['GET']) |
|
|
def home(): |
|
|
return jsonify({ |
|
|
"message": "Uncensored LLM API", |
|
|
"model": "dolphin-phi-2-2.7b", |
|
|
"endpoints": { |
|
|
"chat": "/v1/chat/completions (POST)", |
|
|
"health": "/health (GET)" |
|
|
} |
|
|
}) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
app.run(host='0.0.0.0', port=7860) |
|
|
|