Spaces:
Build error
Build error
Upload 4 files
Browse files
llamacpp_python/base/call-model.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### 6. **Handling Errors**
|
| 2 |
+
#- If the chat application cannot connect to the model server, check the following:
|
| 3 |
+
# - Is the model server running?
|
| 4 |
+
# - Is the `MODEL_ENDPOINT` URL correct?
|
| 5 |
+
# - Are there any firewall or network restrictions blocking the connection?
|
| 6 |
+
# - Are the ports correctly mapped (if using Docker)?
|
| 7 |
+
|
| 8 |
+
#- Add error handling in the chat application to handle cases where the model server is unavailable:
|
| 9 |
+
|
| 10 |
+
#```python
|
| 11 |
+
def call_model(prompt):
|
| 12 |
+
try:
|
| 13 |
+
url = f"{model_service}/generate"
|
| 14 |
+
payload = {"prompt": prompt}
|
| 15 |
+
response = requests.post(url, json=payload, timeout=10) # Add a timeout
|
| 16 |
+
response.raise_for_status() # Raise an error for bad status codes
|
| 17 |
+
return response.json().get("response", "No response from model")
|
| 18 |
+
except requests.exceptions.RequestException as e:
|
| 19 |
+
return f"Error connecting to the model server: {e}"
|
| 20 |
+
|
llamacpp_python/base/chat-app.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#2. **Set the `MODEL_ENDPOINT` Environment Variable**
|
| 2 |
+
#In your chat application, ensure the `MODEL_ENDPOINT` environment variable is set to the correct URL of the model server. For example:
|
| 3 |
+
#```python
|
| 4 |
+
import os
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
# Get the model endpoint from the environment variable
|
| 8 |
+
model_service = os.getenv("MODEL_ENDPOINT", "http://localhost:8001")
|
| 9 |
+
|
| 10 |
+
# Example function to call the model server
|
| 11 |
+
def call_model(prompt):
|
| 12 |
+
url = f"{model_service}/generate"
|
| 13 |
+
payload = {"prompt": prompt}
|
| 14 |
+
response = requests.post(url, json=payload)
|
| 15 |
+
if response.status_code == 200:
|
| 16 |
+
return response.json().get("response", "No response from model")
|
| 17 |
+
else:
|
| 18 |
+
return f"Error: {response.status_code}"
|
| 19 |
+
|
| 20 |
+
# Test the connection
|
| 21 |
+
if __name__ == "__main__":
|
| 22 |
+
prompt = "Hello, model!"
|
| 23 |
+
result = call_model(prompt)
|
| 24 |
+
print(result)
|
| 25 |
+
#### 3. **Test the Connection**
|
| 26 |
+
#Run the chat application and test the connection to the model server:
|
| 27 |
+
|
| 28 |
+
#```bash
|
| 29 |
+
# Set the MODEL_ENDPOINT environment variable
|
| 30 |
+
#export MODEL_ENDPOINT="http://localhost:8001"
|
| 31 |
+
|
| 32 |
+
# Run the chat application
|
| 33 |
+
#python chat_app.py
|
| 34 |
+
#```
|
| 35 |
+
|
| 36 |
+
#If everything is set up correctly, the chat application should be able to call #the model server and receive a response.
|
| 37 |
+
|
| 38 |
+
|
llamacpp_python/base/docker-compose.yml
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### 4. **Deploying in a Containerized Environment**
|
| 2 |
+
#If you're using Docker or Podman, ensure the containers can communicate with each other. For example:
|
| 3 |
+
|
| 4 |
+
#- **Docker Compose**:
|
| 5 |
+
# Create a `docker-compose.yml` file to define both the model server and the chat application:
|
| 6 |
+
|
| 7 |
+
#```yaml
|
| 8 |
+
version: "3"
|
| 9 |
+
services:
|
| 10 |
+
model_server:
|
| 11 |
+
image: my_model_server_image
|
| 12 |
+
ports:
|
| 13 |
+
- "8001:8001"
|
| 14 |
+
environment:
|
| 15 |
+
- PORT=8001
|
| 16 |
+
networks:
|
| 17 |
+
- my_network
|
| 18 |
+
chat_app:
|
| 19 |
+
image: my_chat_app_image
|
| 20 |
+
environment:
|
| 21 |
+
- MODEL_ENDPOINT=http://model_server:8001
|
| 22 |
+
depends_on:
|
| 23 |
+
- model_server
|
| 24 |
+
networks:
|
| 25 |
+
- my_network
|
| 26 |
+
|
| 27 |
+
networks:
|
| 28 |
+
my_network:
|
| 29 |
+
#```
|
| 30 |
+
|
| 31 |
+
#- The `MODEL_ENDPOINT` for the chat application is set to `http://model_server:8001`, which uses Docker's internal DNS to resolve the model server's container name.
|
| 32 |
+
|
| 33 |
+
#- **Docker Networking**:
|
| 34 |
+
# If you're not using Docker Compose, you can create a custom network and attach both containers to it:
|
| 35 |
+
|
| 36 |
+
# ```bash
|
| 37 |
+
# Create a custom network
|
| 38 |
+
# docker network create my_network
|
| 39 |
+
|
| 40 |
+
# Run the model server container
|
| 41 |
+
# docker run -d --name model_server --network my_network -p 8001:8001 my_model_server_image
|
| 42 |
+
# Run the chat application container
|
| 43 |
+
# docker run -d --name chat_app --network my_network -e MODEL_ENDPOINT=http://model_server:8001 my_chat_app_image
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
#### 5. **Testing the Endpoint**
|
| 49 |
+
#To ensure the model server is working as expected, you can test the endpoint directly using `curl` or a tool like Postman:
|
| 50 |
+
|
| 51 |
+
#```bash
|
| 52 |
+
#curl -X POST http://localhost:8001/generate -H "Content-Type: application/json" -d '{"prompt": "Hello, model!"}'
|
| 53 |
+
#```
|
| 54 |
+
|
| 55 |
+
#Expected response:
|
| 56 |
+
#```json
|
| 57 |
+
#{
|
| 58 |
+
# "response": "Generated response for: Hello, model!"
|
| 59 |
+
#}
|
llamacpp_python/base/model_server.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
+
|
| 3 |
+
app = Flask(__name__)
|
| 4 |
+
|
| 5 |
+
@app.route("/generate", methods=["POST"])
|
| 6 |
+
def generate():
|
| 7 |
+
data = request.json
|
| 8 |
+
prompt = data.get("prompt", "")
|
| 9 |
+
# Simulate a response from the model
|
| 10 |
+
response = f"Generated response for: {prompt}"
|
| 11 |
+
return jsonify({"response": response})
|
| 12 |
+
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
app.run(host="0.0.0.0", port=8001)
|
| 15 |
+
|
| 16 |
+
#Run the server:
|
| 17 |
+
#```bash
|
| 18 |
+
##python model_server.py
|
| 19 |
+
#```
|
| 20 |
+
|
| 21 |
+
#This server listens on `http://0.0.0.0:8001` and exposes a `/generate` endpoint# for generating responses.
|
| 22 |
+
|