Ilke Ileri commited on
Commit
692ef6b
·
1 Parent(s): 3ea09f6

Add Vapi Gemma API application

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. app.py +92 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy application
10
+ COPY app.py .
11
+
12
+ # Expose port 7860 (Hugging Face Spaces default)
13
+ EXPOSE 7860
14
+
15
+ # Run with gunicorn
16
+ CMD ["gunicorn", "app:app", "--bind", "0.0.0.0:7860", "--timeout", "300", "--workers", "1"]
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from peft import PeftModel
5
+ import torch
6
+ import os
7
+
8
+ app = Flask(__name__)
9
+ CORS(app)
10
+
11
+ # Model adları
12
+ MODEL_NAME = "ilkeileri/gemma-sales-comprehensive"
13
+ BASE_MODEL = "google/gemma-1.1-2b-it"
14
+
15
+ print("Loading tokenizer...")
16
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
17
+
18
+ print("Loading base model...")
19
+ base_model = AutoModelForCausalLM.from_pretrained(
20
+ BASE_MODEL,
21
+ dtype=torch.float16,
22
+ low_cpu_mem_usage=True,
23
+ trust_remote_code=True
24
+ )
25
+
26
+ print("Loading LoRA adapters...")
27
+ model = PeftModel.from_pretrained(base_model, MODEL_NAME)
28
+ model.eval()
29
+
30
+ print("Model loaded successfully!")
31
+
32
+ @app.route("/", methods=["GET"])
33
+ def health_check():
34
+ return jsonify({
35
+ "status": "ok",
36
+ "model": MODEL_NAME,
37
+ "base_model": BASE_MODEL
38
+ }), 200
39
+
40
+ @app.route("/chat/completions", methods=["POST", "OPTIONS"])
41
+ def chat_completions():
42
+ if request.method == "OPTIONS":
43
+ return "", 200
44
+
45
+ try:
46
+ data = request.get_json()
47
+ messages = data.get("messages", [])
48
+
49
+ if not messages:
50
+ prompt = data.get("prompt", "")
51
+ else:
52
+ last_message = messages[-1] if messages else {}
53
+ prompt = last_message.get("content", "")
54
+
55
+ if not prompt:
56
+ return jsonify({"error": "No prompt provided"}), 400
57
+
58
+ # Gemma formatında prompt
59
+ formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
60
+
61
+ # Model yanıtı üret
62
+ inputs = tokenizer(formatted_prompt, return_tensors="pt")
63
+ outputs = model.generate(
64
+ inputs["input_ids"],
65
+ max_new_tokens=200,
66
+ temperature=0.7,
67
+ do_sample=True,
68
+ top_p=0.9,
69
+ num_return_sequences=1
70
+ )
71
+
72
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
73
+ response_text = full_response.replace(formatted_prompt, "").strip()
74
+
75
+ # Vapi formatı
76
+ vapi_response = {
77
+ "choices": [{
78
+ "message": {
79
+ "role": "assistant",
80
+ "content": response_text
81
+ }
82
+ }]
83
+ }
84
+
85
+ return jsonify(vapi_response), 200
86
+
87
+ except Exception as e:
88
+ print(f"Error: {str(e)}")
89
+ return jsonify({"error": str(e)}), 500
90
+
91
+ if __name__ == "__main__":
92
+ app.run(host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ flask==3.0.0
2
+ flask-cors==4.0.0
3
+ gunicorn==21.2.0
4
+ transformers>=4.38.0
5
+ peft>=0.8.0
6
+ torch>=2.0.0
7
+ accelerate>=0.25.0
8
+ sentencepiece>=0.1.99
9
+ protobuf>=3.20.0