akcoderspark's picture
Update app.py
89fb040 verified
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
app = Flask(__name__)
print("πŸš€ Loading Dolphin-Phi-2 (uncensored)...")
model_name = "cognitivecomputations/dolphin-2_6-phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float32, # βœ… Changed to float32 for CPU
device_map="cpu", # βœ… Explicitly use CPU
low_cpu_mem_usage=True,
trust_remote_code=True
)
print("βœ… Model loaded!")
@app.route('/v1/chat/completions', methods=['POST'])
def generate():
try:
data = request.json
messages = data.get('messages', [])
max_tokens = data.get('max_tokens', 300)
temperature = data.get('temperature', 0.8)
system_msg = ""
user_msg = ""
for msg in messages:
if msg['role'] == 'system':
system_msg = msg['content']
elif msg['role'] == 'user':
user_msg = msg['content']
prompt = f"<|im_start|>system\n{system_msg}<|im_end|>\n<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response_text = full_response.split("<|im_start|>assistant")[-1].replace("<|im_end|>", "").strip()
return jsonify({
"choices": [{
"message": {
"role": "assistant",
"content": response_text
}
}]
})
except Exception as e:
print(f"❌ Error: {str(e)}")
return jsonify({"error": str(e)}), 500
@app.route('/health', methods=['GET'])
def health():
return jsonify({"status": "ok", "model": "dolphin-phi-2"})
@app.route('/', methods=['GET'])
def home():
return jsonify({
"message": "Uncensored LLM API",
"model": "dolphin-phi-2-2.7b",
"endpoints": {
"chat": "/v1/chat/completions (POST)",
"health": "/health (GET)"
}
})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)