Spaces:

kspchary
/

vibe-coding-backend

Sleeping

App Files Files Community

kspchary commited on 25 days ago

Commit

054e177

verified ·

1 Parent(s): aa4435e

Upload server.js

Browse files

Files changed (1) hide show

server.js +27 -168

server.js CHANGED Viewed

@@ -57,7 +57,7 @@ if (cluster.isPrimary) {
     // 1. Rate Limiting for Fairness (Essential for 10k+ Users)
     const limiter = rateLimit({
         windowMs: 15 * 60 * 1000, // 15 minutes
-        max: 50, // limit each IP to 50 requests per window
         message: { error: "Too many requests. Please wait a moment." },
         standardHeaders: true,
         legacyHeaders: false,
@@ -91,7 +91,7 @@ if (cluster.isPrimary) {
     }
     const SYSTEM_PROMPTS = {
-        vibe: "You are an expert full-stack developer and friendly Davii ai assistant. Be professional, direct, and kind.",
         ui: "You are a world-class UI/UX and CSS expert. Focus on modern aesthetics, glassmorphism, animations, and beautiful responsive layouts.",
         security: "You are a Cyber-Security Teacher and Researcher. ...",
         logic: "You are a backend architect specializing in algorithms ...",
@@ -131,13 +131,6 @@ Rules:
         "codellama/CodeLlama-7b-hf"
     ];
-    const VISION_MODELS = [
-        "llava-hf/llava-1.5-7b-hf",
-        "Qwen/Qwen2-VL-7B-Instruct",
-        "meta-llama/Llama-3.2-11B-Vision-Instruct",
-        "llava-hf/llava-v1.6-vicuna-7b-hf"
-    ];
     // Worker-Local Queue (Scales with number of workers)
     const requestQueue = [];
     let activeRequests = 0;
@@ -160,89 +153,6 @@ Rules:
     });
     async function callHuggingFace(model, messages, res, isInternalThought = false) {
-        const isVisionModel = VISION_MODELS.includes(model);
-        // Stage 1: Try OpenAI-compatible endpoint
-        let API_URL = isVisionModel
-            ? `https://api-inference.huggingface.co/models/${model}/v1/chat/completions`
-            : `https://router.huggingface.co/v1/chat/completions`;
-        console.log(`[Worker ${process.pid}] [Stage 1] Calling ${isVisionModel ? 'Vision' : 'Text'} Model: ${model}`);
-        try {
-            let response = await fetch(API_URL, {
-                method: "POST",
-                headers: {
-                    "Authorization": `Bearer ${HF_TOKEN}`,
-                    "Content-Type": "application/json"
-                },
-                body: JSON.stringify({
-                    model: model,
-                    messages: messages,
-                    max_tokens: 5000,
-                    temperature: 0.7,
-                    stream: true
-                })
-            });
-            // Stage 2 Fallback: If vision model and Stage 1 fails with specific errors, try native format
-            if (isVisionModel && (!response.ok || response.status === 404)) {
-                console.log(`[Worker ${process.pid}] [Stage 1 Failed] Falling back to Direct Inference for: ${model}`);
-                const nativeApiUrl = `https://api-inference.huggingface.co/models/${model}`;
-                // Construct native payload (non-streaming, basic)
-                const lastUserMessage = messages.findLast(m => m.role === "user");
-                const textContent = Array.isArray(lastUserMessage.content)
-                    ? lastUserMessage.content.find(c => c.type === "text")?.text
-                    : lastUserMessage.content;
-                const imageContent = Array.isArray(lastUserMessage.content)
-                    ? lastUserMessage.content.find(c => c.type === "image_url")?.image_url?.url
-                    : null;
-                // Native format: some models want {inputs: {image: ..., text: ...}}, others just binary
-                const nativePayload = imageContent
-                    ? { inputs: textContent, image: imageContent.includes("base64,") ? imageContent.split("base64,")[1] : imageContent }
-                    : { inputs: textContent };
-                console.log(`[Worker ${process.pid}] [Stage 2] Calling Native API: ${nativeApiUrl}`);
-                const fallbackResponse = await fetch(nativeApiUrl, {
-                    method: "POST",
-                    headers: {
-                        "Authorization": `Bearer ${HF_TOKEN}`,
-                        "Content-Type": "application/json"
-                    },
-                    body: JSON.stringify(nativePayload)
-                });
-                if (fallbackResponse.ok) {
-                    const result = await fallbackResponse.json();
-                    let generatedText = Array.isArray(result) ? result[0].generated_text : (result.generated_text || JSON.stringify(result));
-                    // Simple stream simulation for fallback
-                    res.write(`data: ${JSON.stringify({ choices: [{ delta: { content: generatedText } }] })}\n\n`);
-                    res.write("data: [DONE]\n\n");
-                    return generatedText;
-                }
-            }
-            if (response.status === 429) throw new Error("RATE_LIMIT");
-            if (response.status === 503) throw new Error("MODEL_LOADING");
-            if (!response.ok) {
-                const err = await response.json().catch(() => ({}));
-                console.error(`[Worker ${process.pid}] HF API Error [${response.status}]:`, JSON.stringify(err, null, 2));
-                throw new Error(err.error?.message || err.error || `HF Error ${response.status}`);
-            }
-            return streamResponse(response, res, isInternalThought);
-        } catch (error) {
-            console.error(`[Worker ${process.pid}] Call Error:`, error.message);
-            throw error;
-        }
-    }
-    async function callHuggingFaceRouterInternal(model, messages, res, isInternalThought = false) {
         const API_URL = `https://router.huggingface.co/v1/chat/completions`;
         const response = await fetch(API_URL, {
             method: "POST",
@@ -258,12 +168,15 @@ Rules:
                 stream: true
             })
         });
-        if (!response.ok) throw new Error(`Router Fallback Failed: ${response.status}`);
-        return streamResponse(response, res, isInternalThought);
-    }
-    async function streamResponse(response, res, isInternalThought) {
-        if (!isInternalThought && !res.headersSent) {
             res.setHeader('Content-Type', 'text/event-stream');
             res.setHeader('Cache-Control', 'no-cache');
             res.setHeader('Connection', 'keep-alive');
@@ -354,41 +267,14 @@ Rules:
     }
     async function handleVibeRequest(req, res) {
-        const { prompt, mode = "vibe", history = [], sessionId = "default", images = [] } = req.body;
-        if (!prompt && images.length === 0) return res.status(400).json({ error: "Prompt or images are required" });
-        const systemContent = String(SYSTEM_PROMPTS[mode] || SYSTEM_PROMPTS.vibe);
-        // Prepare messages
-        let messages = [];
-        // Handle Multimodal (Images + Text)
-        if (images.length > 0) {
-            const userContent = [];
-            // Standard multimodal format: First text, then images
-            userContent.push({ type: "text", text: `${systemContent}\n\n${prompt || "What is in this image?"}` });
-            images.forEach(img => {
-                // High-precision base64 check - ensure no weird line breaks or spaces
-                const cleanImg = img.trim();
-                userContent.push({
-                    type: "image_url",
-                    image_url: { url: cleanImg }
-                });
-            });
-            messages = [{ role: "user", content: userContent }];
-        } else {
-            messages = [{ role: "system", content: systemContent }, ...history, { role: "user", content: prompt }];
-        }
-        const currentModelList = images.length > 0 ? VISION_MODELS : (mode === 'deepseek' ? DEEPSEEK_MODELS : MODELS);
         let lastError = null;
-        // For logging purposes
-        const displayPrompt = prompt || (images.length > 0 ? "[Image Attachment]" : "");
         for (let i = 0; i < currentModelList.length; i++) {
             const model = currentModelList[i];
             try {
@@ -401,11 +287,9 @@ Rules:
                 if (!isAgentMode) {
                     finalText = await callHuggingFace(model, messages, res, false);
                 } else {
-                    if (!res.headersSent) {
-                        res.setHeader('Content-Type', 'text/event-stream');
-                        res.setHeader('Cache-Control', 'no-cache');
-                        res.setHeader('Connection', 'keep-alive');
-                    }
                     headersSent = true;
                     while (loopCount < maxLoops) {
@@ -447,7 +331,7 @@ Rules:
                 try {
                     const logEntry = {
                         timestamp: new Date().toISOString(),
-                        prompt: displayPrompt,
                         response: finalText,
                         mode,
                         model: model,
@@ -477,44 +361,19 @@ Rules:
                 return;
             } catch (error) {
                 lastError = error;
-                console.error(`[Worker ${process.pid}] Model ${model} failed:`, error.message);
-                // Continue to next model in currentModelList
-            }
-        }
-        if (images.length > 0) {
-            console.log(`[Worker ${process.pid}] All vision models failed. Attempting text-only fallback.`);
-            try {
-                const fallbackModel = MODELS[0]; // Use first reliable text model
-                const warningMsg = "\n\n*(Note: Image processing is temporarily unavailable. Responding based on your text prompt instead...)*\n\n";
-                // Construct a clean text-only message list
-                const textOnlyMessages = [{ role: "system", content: systemContent }, ...history, { role: "user", content: prompt || "Describe the attached images (Note: Processing unavailable)" }];
-                // Stream the response with a prefix
-                if (!res.headersSent) {
-                    res.setHeader('Content-Type', 'text/event-stream');
                 }
-                res.write(`data: ${JSON.stringify({ token: warningMsg })}\n\n`);
-                await callHuggingFace(fallbackModel, textOnlyMessages, res, false);
-                return;
-            } catch (fallbackErr) {
-                console.error(`[Worker ${process.pid}] Global Fallback Failed:`, fallbackErr.message);
             }
         }
         const finalErrorMessage = lastError?.message === "RATE_LIMIT"
             ? "Server busy (10k+ load cap reached). Please wait a few seconds."
-            : `Image Processing Error: ${lastError?.message}. Please try again later or with a different image.`;
-        if (!res.headersSent) {
-            res.status(503).json({ error: finalErrorMessage });
-        } else {
-            res.write(`data: ${JSON.stringify({ error: finalErrorMessage })}\n\n`);
-            res.write("data: [DONE]\n\n");
-            res.end();
-        }
     }
     app.post("/image", async (req, res) => {
@@ -538,7 +397,7 @@ Rules:
         }
     });
-    app.get("/", (req, res) => res.send(`Davii ai [Worker ${process.pid}] is powering the vibe! 🛸`));
     const PORT = 7860;
     app.listen(PORT, () => console.log(`[Worker ${process.pid}] Multi-core node running on port ${PORT}`));

     // 1. Rate Limiting for Fairness (Essential for 10k+ Users)
     const limiter = rateLimit({
         windowMs: 15 * 60 * 1000, // 15 minutes
+        max: 10000, // Practically unlimited: 10,000 requests per 15 mins
         message: { error: "Too many requests. Please wait a moment." },
         standardHeaders: true,
         legacyHeaders: false,
     }
     const SYSTEM_PROMPTS = {
+        vibe: "You are an expert full-stack developer and friendly Prachee ai assistant. Be professional, direct, and kind.",
         ui: "You are a world-class UI/UX and CSS expert. Focus on modern aesthetics, glassmorphism, animations, and beautiful responsive layouts.",
         security: "You are a Cyber-Security Teacher and Researcher. ...",
         logic: "You are a backend architect specializing in algorithms ...",
         "codellama/CodeLlama-7b-hf"
     ];
     // Worker-Local Queue (Scales with number of workers)
     const requestQueue = [];
     let activeRequests = 0;
     });
     async function callHuggingFace(model, messages, res, isInternalThought = false) {
         const API_URL = `https://router.huggingface.co/v1/chat/completions`;
         const response = await fetch(API_URL, {
             method: "POST",
                 stream: true
             })
         });
+        if (response.status === 429) throw new Error("RATE_LIMIT");
+        if (response.status === 503) throw new Error("MODEL_LOADING");
+        if (!response.ok) {
+            const err = await response.json().catch(() => ({}));
+            throw new Error(err.error?.message || `HF Error ${response.status}`);
+        }
+        if (!isInternalThought) {
             res.setHeader('Content-Type', 'text/event-stream');
             res.setHeader('Cache-Control', 'no-cache');
             res.setHeader('Connection', 'keep-alive');
     }
     async function handleVibeRequest(req, res) {
+        const { prompt, mode = "vibe", history = [], sessionId = "default" } = req.body;
+        if (!prompt) return res.status(400).json({ error: "Prompt is required" });
+        const systemContent = SYSTEM_PROMPTS[mode] || SYSTEM_PROMPTS.vibe;
+        let messages = [{ role: "system", content: systemContent }, ...history, { role: "user", content: prompt }];
+        const currentModelList = mode === 'deepseek' ? DEEPSEEK_MODELS : MODELS;
         let lastError = null;
         for (let i = 0; i < currentModelList.length; i++) {
             const model = currentModelList[i];
             try {
                 if (!isAgentMode) {
                     finalText = await callHuggingFace(model, messages, res, false);
                 } else {
+                    res.setHeader('Content-Type', 'text/event-stream');
+                    res.setHeader('Cache-Control', 'no-cache');
+                    res.setHeader('Connection', 'keep-alive');
                     headersSent = true;
                     while (loopCount < maxLoops) {
                 try {
                     const logEntry = {
                         timestamp: new Date().toISOString(),
+                        prompt,
                         response: finalText,
                         mode,
                         model: model,
                 return;
             } catch (error) {
                 lastError = error;
+                if (res.headersSent) {
+                    res.write(`data: ${JSON.stringify({ token: "\n\n[System] All models busy. Retrying later..." })}\n\n`);
+                    res.write("data: [DONE]\n\n");
+                    return res.end();
                 }
             }
         }
         const finalErrorMessage = lastError?.message === "RATE_LIMIT"
             ? "Server busy (10k+ load cap reached). Please wait a few seconds."
+            : `System logic error or busy models. Error: ${lastError?.message}`;
+        res.status(503).json({ error: finalErrorMessage });
     }
     app.post("/image", async (req, res) => {
         }
     });
+    app.get("/", (req, res) => res.send(`Prachee ai [Worker ${process.pid}] is powering the vibe! 🛸`));
     const PORT = 7860;
     app.listen(PORT, () => console.log(`[Worker ${process.pid}] Multi-core node running on port ${PORT}`));