abenkbp commited on
Commit
c334c50
·
1 Parent(s): efa46ce

add model

Browse files
data/models/llama3-1-70b.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import spaces
3
+ import json
4
+ import transformers
5
+ import torch
6
+ import spaces
7
+
8
+ model_id = "meta-llama/Meta-Llama-3.1-70B-Instruct"
9
+
10
+ # Initialize Flask app
11
+ app = Flask(__name__)
12
+
13
+ pipeline = transformers.pipeline(
14
+ "text-generation",
15
+ model=model_id,
16
+ model_kwargs={"torch_dtype": torch.bfloat16},
17
+ device_map="auto",
18
+ )
19
+
20
+ @app.route('/chat', methods=['POST'])
21
+ @spaces.GPU()
22
+ def chat_completion():
23
+ data = request.json
24
+ user_input = data.get('user_input', [])
25
+ max_tokens = data.get('max_tokens', 2048)
26
+ temperature = data.get('temperature', 0.7)
27
+ top_p = data.get('top_p', 0.95)
28
+
29
+ print(f"Received user_input: {user_input}")
30
+ print(f"max_tokens: {max_tokens}, temperature: {temperature}, top_p: {top_p}")
31
+
32
+ try:
33
+ response = ""
34
+ outputs = pipeline(
35
+ user_input,
36
+ max_new_tokens=max_tokens,
37
+ temperature=temperature,
38
+ top_p=top_p
39
+ )
40
+
41
+ return jsonify({"status": "success", "output": outputs[0]["generated_text"][-1]})
42
+ except Exception as e:
43
+ return jsonify({"status": "error", "message": str(e)})
44
+
45
+
46
+ def main():
47
+ app.run(host='0.0.0.0', port=7051)
48
+
49
+ if __name__ == "__main__":
50
+ main()
chat.py → data/models/llama3-70b.py RENAMED
File without changes