Pection commited on
Commit
944de77
·
verified ·
1 Parent(s): 3096045

Upload hf_llama.py

Browse files

update hugging face llama

Files changed (1) hide show
  1. hf_llama.py +34 -0
hf_llama.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ app = Flask(__name__)
6
+
7
+ model_path = "./llama"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_path,
11
+ trust_remote_code=True,
12
+ device_map="auto"
13
+ )
14
+
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ model = model.to(device)
17
+
18
+ @app.route('/generate', methods=['POST'])
19
+ def generate_response():
20
+ input_data = request.json
21
+ prompt = input_data.get("prompt", "")
22
+
23
+ if not prompt:
24
+ return jsonify({"error": "No prompt provided"}), 400
25
+
26
+ # Tokenize the prompt and generate response
27
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
28
+ outputs = model.generate(**inputs, max_new_tokens=50)
29
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+
31
+ return jsonify({"response": response})
32
+
33
+ if __name__ == '__main__':
34
+ app.run(host='0.0.0.0', port=5000)