tjwrld commited on
Commit
90a79c7
·
verified ·
1 Parent(s): c5eeb26

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +45 -30
server.js CHANGED
@@ -1,73 +1,87 @@
1
  import express from "express";
2
  import { fileURLToPath } from "url";
3
  import path from "path";
 
4
  import { getLlama, LlamaChatSession } from "node-llama-cpp";
5
 
6
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
7
  const app = express();
8
 
9
- // Middleware to parse JSON bodies
10
  app.use(express.json());
 
 
 
 
11
 
12
  // Hugging Face Spaces expects apps to run on port 7860
13
  const PORT = process.env.PORT || 7860;
14
 
15
- let model;
16
- let context;
 
17
 
18
  /* -----------------------
19
  LOAD MODEL (ONCE)
20
  ----------------------- */
21
  async function initModel() {
22
- console.log("Initializing Llama...");
 
23
  const llama = await getLlama();
24
 
25
- console.log("Loading model into memory...");
26
- model = await llama.loadModel({
27
- modelPath: path.join(__dirname, "models", "gemma-3-1b-it-UD-IQ1_S.gguf"),
28
- gpu: false // HF Free tier doesn't have a GPU
 
 
 
29
  });
30
 
31
- console.log("Creating context...");
32
- context = await model.createContext({
33
  batchSize: 512,
34
- threads: 6
35
  });
36
 
37
- console.log("Model loaded and ready! 🚀");
 
38
  }
39
 
40
  /* -----------------------
41
  API ENDPOINTS
42
  ----------------------- */
43
 
44
- // Health check endpoint (Useful for the HF Spaces UI)
45
- app.get("/", (req, res) => {
46
- res.send("EdgeGPT Server is running. Send a POST request to /generate.");
47
- });
48
-
49
- // The main generation endpoint
50
  app.post("/generate", async (req, res) => {
51
  try {
52
- // Extract inputs with default fallbacks
53
  const {
54
  user_input,
55
- user_temp = 0.1,
56
- user_inst = "You are a helpful AI assistant.",
57
  user_max_token = 5120
58
  } = req.body;
59
 
60
  if (!user_input) {
61
- return res.status(400).json({ error: "user_input is required in the request body." });
62
  }
63
 
64
- // Create a new session for this request to apply the specific system instructions
 
 
 
 
65
  const session = new LlamaChatSession({
66
- contextSequence: context.getSequence(),
67
- systemPrompt: user_inst
68
  });
69
 
70
- const response = await session.prompt(user_input, {
 
71
  maxTokens: parseInt(user_max_token),
72
  temperature: parseFloat(user_temp),
73
  topK: 40,
@@ -75,11 +89,11 @@ app.post("/generate", async (req, res) => {
75
  repeatPenalty: 1.1
76
  });
77
 
78
- res.json({ response });
79
 
80
  } catch (err) {
81
  console.error("Error during generation:", err);
82
- res.status(500).json({ error: "An error occurred during text generation." });
83
  }
84
  });
85
 
@@ -87,11 +101,12 @@ app.post("/generate", async (req, res) => {
87
  START SERVER
88
  ----------------------- */
89
  initModel().then(() => {
90
- // Listen on 0.0.0.0 so external networks (like Hugging Face routing) can connect
91
  app.listen(PORT, "0.0.0.0", () => {
92
  console.log(`Server is listening on port ${PORT}`);
 
93
  });
94
  }).catch(err => {
95
- console.error("Failed to initialize the model server:", err);
96
  process.exit(1);
97
  });
 
1
  import express from "express";
2
  import { fileURLToPath } from "url";
3
  import path from "path";
4
+ import morgan from "morgan"; // Useful logging
5
  import { getLlama, LlamaChatSession } from "node-llama-cpp";
6
 
7
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
  const app = express();
9
 
10
+ // Middleware to parse JSON bodies and log requests
11
  app.use(express.json());
12
+ app.use(morgan('dev')); // Logs endpoint access to the console
13
+
14
+ // SERVE THE UI: Tells Express to look for files in the "public" folder
15
+ app.use(express.static(path.join(__dirname, 'public')));
16
 
17
  // Hugging Face Spaces expects apps to run on port 7860
18
  const PORT = process.env.PORT || 7860;
19
 
20
+ // Set global instances so the model stays loaded in memory
21
+ let modelInstance;
22
+ let contextInstance;
23
 
24
  /* -----------------------
25
  LOAD MODEL (ONCE)
26
  ----------------------- */
27
  async function initModel() {
28
+ console.log("-----------------------------------------");
29
+ console.log("Initializing Llama Backend...");
30
  const llama = await getLlama();
31
 
32
+ // Path inside the Docker container
33
+ const modelLocation = path.join(__dirname, "models", "gemma-3-1b-it-UD-IQ1_S.gguf");
34
+
35
+ console.log(`Loading model into memory: ${modelLocation}`);
36
+ modelInstance = await llama.loadModel({
37
+ modelPath: modelLocation,
38
+ gpu: false // Ensure CPU execution for HF Free tier
39
  });
40
 
41
+ console.log("Creating context sequence...");
42
+ contextInstance = await modelInstance.createContext({
43
  batchSize: 512,
44
+ threads: 6 // Optimize for available vCPUs
45
  });
46
 
47
+ console.log("Model successfully loaded! 🚀");
48
+ console.log("-----------------------------------------");
49
  }
50
 
51
  /* -----------------------
52
  API ENDPOINTS
53
  ----------------------- */
54
 
55
+ /**
56
+ * Main generation endpoint (The UI calls this internally)
57
+ * Takes 4 inputs in JSON: user_input, user_temp, user_inst, user_max_token
58
+ */
 
 
59
  app.post("/generate", async (req, res) => {
60
  try {
61
+ // Input validation and defaults
62
  const {
63
  user_input,
64
+ user_temp = 0.7,
65
+ user_inst = "You are an AI assistant. Give short clear answers.",
66
  user_max_token = 5120
67
  } = req.body;
68
 
69
  if (!user_input) {
70
+ return res.status(400).json({ error: "Missing required field: user_input" });
71
  }
72
 
73
+ console.log(`Generating response for: "${user_input.substring(0, 50)}..."`);
74
+ console.log(`Params: temp=${user_temp}, max=${user_max_token}`);
75
+
76
+ // We create a new session for each request so it uses the *dynamic* instructions
77
+ // This is safe because it reuses the global context sequence
78
  const session = new LlamaChatSession({
79
+ contextSequence: contextInstance.getSequence(),
80
+ systemPrompt: user_inst // Apply user provided instructions
81
  });
82
 
83
+ // Generate response using provided temperature and max_token parameters
84
+ const responseText = await session.prompt(user_input, {
85
  maxTokens: parseInt(user_max_token),
86
  temperature: parseFloat(user_temp),
87
  topK: 40,
 
89
  repeatPenalty: 1.1
90
  });
91
 
92
+ res.json({ response: responseText });
93
 
94
  } catch (err) {
95
  console.error("Error during generation:", err);
96
+ res.status(500).json({ error: "An internal error occurred during text generation." });
97
  }
98
  });
99
 
 
101
  START SERVER
102
  ----------------------- */
103
  initModel().then(() => {
104
+ // Listen on 0.0.0.0 for external network routing (like Hugging Face)
105
  app.listen(PORT, "0.0.0.0", () => {
106
  console.log(`Server is listening on port ${PORT}`);
107
+ console.log(`Access UI at: http://localhost:${PORT}`);
108
  });
109
  }).catch(err => {
110
+ console.error("Critical Failure: Failed to initialize the model server.", err);
111
  process.exit(1);
112
  });