abenkbp commited on
Commit
3af859f
·
1 Parent(s): 240d5a4
Files changed (1) hide show
  1. data/models/llama3-1-70b.py +50 -0
data/models/llama3-1-70b.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from huggingface_hub import login
3
+ import spaces
4
+ import transformers
5
+ import torch
6
+ import os
7
+
8
+ # Initialize Flask app
9
+ app = Flask(__name__)
10
+
11
+ api_key = os.getenv("UCODE_SECRET")
12
+ login(api_key,add_to_git_credential=True)
13
+
14
+ model_id = "meta-llama/Meta-Llama-3.1-70B-Instruct"
15
+
16
+ pipeline = transformers.pipeline(
17
+ "text-generation",
18
+ model=model_id,
19
+ model_kwargs={"torch_dtype": torch.bfloat16},
20
+ device="cuda",
21
+ token=True
22
+ )
23
+
24
+ @app.route('/chat', methods=['POST'])
25
+ @spaces.GPU(enable_queue=True)
26
+ def chat_completion():
27
+ data = request.json
28
+
29
+ user_input = data[0].get('user_input', [])
30
+ max_tokens = data[0].get('max_tokens', 2048)
31
+ temperature = data[0].get('temperature', 0.7)
32
+ top_p = data[0].get('top_p', 0.95)
33
+
34
+ try:
35
+ outputs = pipeline(
36
+ user_input,
37
+ max_new_tokens=max_tokens,
38
+ temperature=temperature,
39
+ top_p=top_p
40
+ )
41
+ return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]})
42
+ except Exception as e:
43
+ return jsonify({"status": "error", "message": str(e)})
44
+
45
+
46
+ def main():
47
+ app.run(host='0.0.0.0', port=7052)
48
+
49
+ if __name__ == "__main__":
50
+ main()