akcoderspark commited on
Commit
40d3c46
·
verified ·
1 Parent(s): c8aa4f4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ app = Flask(__name__)
6
+
7
+ print("🚀 Loading Dolphin-Phi-2 (uncensored)...")
8
+
9
+ model_name = "cognitivecomputations/dolphin-2_6-phi-2"
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ torch_dtype=torch.float16,
15
+ device_map="auto",
16
+ low_cpu_mem_usage=True,
17
+ trust_remote_code=True
18
+ )
19
+
20
+ print("✅ Model loaded!")
21
+
22
+ @app.route('/v1/chat/completions', methods=['POST'])
23
+ def generate():
24
+ try:
25
+ data = request.json
26
+ messages = data.get('messages', [])
27
+ max_tokens = data.get('max_tokens', 300)
28
+ temperature = data.get('temperature', 0.8)
29
+
30
+ system_msg = ""
31
+ user_msg = ""
32
+
33
+ for msg in messages:
34
+ if msg['role'] == 'system':
35
+ system_msg = msg['content']
36
+ elif msg['role'] == 'user':
37
+ user_msg = msg['content']
38
+
39
+ prompt = f"<|im_start|>system\n{system_msg}<|im_end|>\n<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n"
40
+
41
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
42
+
43
+ with torch.no_grad():
44
+ outputs = model.generate(
45
+ **inputs,
46
+ max_new_tokens=max_tokens,
47
+ temperature=temperature,
48
+ top_p=0.9,
49
+ do_sample=True,
50
+ pad_token_id=tokenizer.eos_token_id
51
+ )
52
+
53
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
54
+ response_text = full_response.split("<|im_start|>assistant")[-1].replace("<|im_end|>", "").strip()
55
+
56
+ return jsonify({
57
+ "choices": [{
58
+ "message": {
59
+ "role": "assistant",
60
+ "content": response_text
61
+ }
62
+ }]
63
+ })
64
+
65
+ except Exception as e:
66
+ print(f"❌ Error: {str(e)}")
67
+ return jsonify({"error": str(e)}), 500
68
+
69
+ @app.route('/health', methods=['GET'])
70
+ def health():
71
+ return jsonify({"status": "ok", "model": "dolphin-phi-2"})
72
+
73
+ @app.route('/', methods=['GET'])
74
+ def home():
75
+ return jsonify({
76
+ "message": "Uncensored LLM API",
77
+ "model": "dolphin-phi-2-2.7b",
78
+ "endpoints": {
79
+ "chat": "/v1/chat/completions (POST)",
80
+ "health": "/health (GET)"
81
+ }
82
+ })
83
+
84
+ if __name__ == '__main__':
85
+ app.run(host='0.0.0.0', port=7860)