File size: 2,606 Bytes
40d3c46 89fb040 40d3c46 89fb040 40d3c46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
app = Flask(__name__)
print("🚀 Loading Dolphin-Phi-2 (uncensored)...")
model_name = "cognitivecomputations/dolphin-2_6-phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float32, # ✅ Changed to float32 for CPU
device_map="cpu", # ✅ Explicitly use CPU
low_cpu_mem_usage=True,
trust_remote_code=True
)
print("✅ Model loaded!")
@app.route('/v1/chat/completions', methods=['POST'])
def generate():
try:
data = request.json
messages = data.get('messages', [])
max_tokens = data.get('max_tokens', 300)
temperature = data.get('temperature', 0.8)
system_msg = ""
user_msg = ""
for msg in messages:
if msg['role'] == 'system':
system_msg = msg['content']
elif msg['role'] == 'user':
user_msg = msg['content']
prompt = f"<|im_start|>system\n{system_msg}<|im_end|>\n<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response_text = full_response.split("<|im_start|>assistant")[-1].replace("<|im_end|>", "").strip()
return jsonify({
"choices": [{
"message": {
"role": "assistant",
"content": response_text
}
}]
})
except Exception as e:
print(f"❌ Error: {str(e)}")
return jsonify({"error": str(e)}), 500
@app.route('/health', methods=['GET'])
def health():
return jsonify({"status": "ok", "model": "dolphin-phi-2"})
@app.route('/', methods=['GET'])
def home():
return jsonify({
"message": "Uncensored LLM API",
"model": "dolphin-phi-2-2.7b",
"endpoints": {
"chat": "/v1/chat/completions (POST)",
"health": "/health (GET)"
}
})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)
|