jaothan commited on
Commit
1a3132e
·
verified ·
1 Parent(s): 72d701d

Upload 4 files

Browse files
llamacpp_python/base/call-model.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### 6. **Handling Errors**
2
+ #- If the chat application cannot connect to the model server, check the following:
3
+ # - Is the model server running?
4
+ # - Is the `MODEL_ENDPOINT` URL correct?
5
+ # - Are there any firewall or network restrictions blocking the connection?
6
+ # - Are the ports correctly mapped (if using Docker)?
7
+
8
+ #- Add error handling in the chat application to handle cases where the model server is unavailable:
9
+
10
+ #```python
11
+ def call_model(prompt):
12
+ try:
13
+ url = f"{model_service}/generate"
14
+ payload = {"prompt": prompt}
15
+ response = requests.post(url, json=payload, timeout=10) # Add a timeout
16
+ response.raise_for_status() # Raise an error for bad status codes
17
+ return response.json().get("response", "No response from model")
18
+ except requests.exceptions.RequestException as e:
19
+ return f"Error connecting to the model server: {e}"
20
+
llamacpp_python/base/chat-app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #2. **Set the `MODEL_ENDPOINT` Environment Variable**
2
+ #In your chat application, ensure the `MODEL_ENDPOINT` environment variable is set to the correct URL of the model server. For example:
3
+ #```python
4
+ import os
5
+ import requests
6
+
7
+ # Get the model endpoint from the environment variable
8
+ model_service = os.getenv("MODEL_ENDPOINT", "http://localhost:8001")
9
+
10
+ # Example function to call the model server
11
+ def call_model(prompt):
12
+ url = f"{model_service}/generate"
13
+ payload = {"prompt": prompt}
14
+ response = requests.post(url, json=payload)
15
+ if response.status_code == 200:
16
+ return response.json().get("response", "No response from model")
17
+ else:
18
+ return f"Error: {response.status_code}"
19
+
20
+ # Test the connection
21
+ if __name__ == "__main__":
22
+ prompt = "Hello, model!"
23
+ result = call_model(prompt)
24
+ print(result)
25
+ #### 3. **Test the Connection**
26
+ #Run the chat application and test the connection to the model server:
27
+
28
+ #```bash
29
+ # Set the MODEL_ENDPOINT environment variable
30
+ #export MODEL_ENDPOINT="http://localhost:8001"
31
+
32
+ # Run the chat application
33
+ #python chat_app.py
34
+ #```
35
+
36
+ #If everything is set up correctly, the chat application should be able to call #the model server and receive a response.
37
+
38
+
llamacpp_python/base/docker-compose.yml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### 4. **Deploying in a Containerized Environment**
2
+ #If you're using Docker or Podman, ensure the containers can communicate with each other. For example:
3
+
4
+ #- **Docker Compose**:
5
+ # Create a `docker-compose.yml` file to define both the model server and the chat application:
6
+
7
+ #```yaml
8
+ version: "3"
9
+ services:
10
+ model_server:
11
+ image: my_model_server_image
12
+ ports:
13
+ - "8001:8001"
14
+ environment:
15
+ - PORT=8001
16
+ networks:
17
+ - my_network
18
+ chat_app:
19
+ image: my_chat_app_image
20
+ environment:
21
+ - MODEL_ENDPOINT=http://model_server:8001
22
+ depends_on:
23
+ - model_server
24
+ networks:
25
+ - my_network
26
+
27
+ networks:
28
+ my_network:
29
+ #```
30
+
31
+ #- The `MODEL_ENDPOINT` for the chat application is set to `http://model_server:8001`, which uses Docker's internal DNS to resolve the model server's container name.
32
+
33
+ #- **Docker Networking**:
34
+ # If you're not using Docker Compose, you can create a custom network and attach both containers to it:
35
+
36
+ # ```bash
37
+ # Create a custom network
38
+ # docker network create my_network
39
+
40
+ # Run the model server container
41
+ # docker run -d --name model_server --network my_network -p 8001:8001 my_model_server_image
42
+ # Run the chat application container
43
+ # docker run -d --name chat_app --network my_network -e MODEL_ENDPOINT=http://model_server:8001 my_chat_app_image
44
+
45
+
46
+
47
+
48
+ #### 5. **Testing the Endpoint**
49
+ #To ensure the model server is working as expected, you can test the endpoint directly using `curl` or a tool like Postman:
50
+
51
+ #```bash
52
+ #curl -X POST http://localhost:8001/generate -H "Content-Type: application/json" -d '{"prompt": "Hello, model!"}'
53
+ #```
54
+
55
+ #Expected response:
56
+ #```json
57
+ #{
58
+ # "response": "Generated response for: Hello, model!"
59
+ #}
llamacpp_python/base/model_server.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+
3
+ app = Flask(__name__)
4
+
5
+ @app.route("/generate", methods=["POST"])
6
+ def generate():
7
+ data = request.json
8
+ prompt = data.get("prompt", "")
9
+ # Simulate a response from the model
10
+ response = f"Generated response for: {prompt}"
11
+ return jsonify({"response": response})
12
+
13
+ if __name__ == "__main__":
14
+ app.run(host="0.0.0.0", port=8001)
15
+
16
+ #Run the server:
17
+ #```bash
18
+ ##python model_server.py
19
+ #```
20
+
21
+ #This server listens on `http://0.0.0.0:8001` and exposes a `/generate` endpoint# for generating responses.
22
+