File size: 1,115 Bytes
e803dc8
cd9c57d
 
 
e803dc8
cd9c57d
066236e
 
e803dc8
066236e
 
e803dc8
 
066236e
 
e803dc8
 
cd9c57d
e803dc8
 
 
 
066236e
e803dc8
 
 
cd9c57d
e803dc8
 
cd9c57d
066236e
cd9c57d
e803dc8
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from flask import Flask, request, jsonify
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

app = Flask(__name__)

# Use CodeLlama-7B (No authentication needed)
model_name = "codellama/CodeLlama-7B"

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.float16,  # Use float16 for efficiency
    device_map="auto"           # Automatically use GPU if available
)


@app.route("/")
def home():
    return request.url

@app.route("/generate", methods=["POST"])
def generate_text():
    data = request.get_json()
    prompt = data.get("prompt", "")

    if not prompt:
        return jsonify({"error": "No prompt provided"}), 400

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
    outputs = model.generate(**inputs, max_length=200)
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return jsonify({"response": response_text})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)