AI / server.js
Gaston895's picture
Upload 4 files
6a8409a verified
/**
* GSS-TEC Sovereign Node β€” Hugging Face Space
* Space: https://huggingface.co/spaces/Gaston895/AI
*
* Purpose: Receive requests from users, verify JWT, forward to Groq or Ollama.
* If the subscriber has a Business Profile (Turso creds in JWT),
* fetches their knowledge base and injects it into the system prompt.
*
* Routes:
* GET /ping - Keep-alive (UptimeRobot)
* GET /health - Health check
* POST /gss-internal/sync-keys - Receive AI keys from Cloudflare (bridge secret)
* POST /v1/chat/completions - Main chat proxy (Groq or Ollama, JWT required)
*
* Three-Way Data Flow:
* 1. SDK β†’ Cloudflare /auth/lease β†’ JWT (with optional Turso creds embedded)
* 2. SDK β†’ HF /v1/chat/completions β†’ JWT verified in RAM (zero DB call)
* 3. HF β†’ Turso (if JWT has turso_url) β†’ fetches 20MB business knowledge
* 4. HF β†’ Groq/Ollama β†’ prompt + knowledge β†’ answer β†’ SDK
*
* HF Secrets to set:
* GSS_MASTER_SECRET - JWT signing secret (same as Cloudflare Worker)
* GROQ_BASE_URL - https://api.groq.com/openai/v1 (default)
* OLLAMA_BASE_URL - e.g. http://your-ollama-server:11434/v1 (optional)
* SPACE_URL - This space's public URL (for self keep-alive ping)
*/
const express = require('express');
const jwt = require('jsonwebtoken');
const { createClient } = require('@libsql/client');
const CF_WORKER_URL = process.env.CF_WORKER_URL || 'https://qssn-d1-api.gastonsoftwaresolutions234.workers.dev';
console.log('[GSS] CF_WORKER_URL:', CF_WORKER_URL);
const app = express();
app.use(express.json({ limit: '4mb' }));
const SDK_JS = `
class GSSClient {
constructor(opts) {
if (!opts.apiKey || !opts.cfWorkerUrl || !opts.hfEngineUrl) {
throw new Error('[GSSClient] apiKey, cfWorkerUrl, and hfEngineUrl are required');
}
this.apiKey = opts.apiKey;
this.cfWorkerUrl = opts.cfWorkerUrl.replace(/\\/$/,'');
this.hfEngineUrl = opts.hfEngineUrl.replace(/\\/$/,'');
this.model = opts.model || 'llama-3.1-8b-instant';
this.storageKey = opts.storageKey || 'gss_jwt';
this._token = null;
if (typeof window !== 'undefined') this._startKeepAlive();
}
_loadCachedToken() {
try {
const raw = localStorage.getItem(this.storageKey);
if (!raw) return null;
const { token, exp } = JSON.parse(raw);
if (Date.now() / 1000 < exp - 300) return token;
localStorage.removeItem(this.storageKey);
} catch {}
return null;
}
_decodeExp(token) {
try {
const p = JSON.parse(atob(token.split('.')[1].replace(/-/g,'+').replace(/_/g,'/')));
return p.exp || 0;
} catch { return 0; }
}
async _lease() {
const res = await fetch(this.cfWorkerUrl + '/auth/lease', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ api_key: this.apiKey }),
});
if (res.status === 403) throw new Error('[GSSClient] Subscription expired or invalid API key');
if (!res.ok) throw new Error('[GSSClient] Lease failed: ' + res.status);
const { token } = await res.json();
const exp = this._decodeExp(token);
localStorage.setItem(this.storageKey, JSON.stringify({ token, exp }));
this._token = token;
return token;
}
async _getToken() {
if (!this._token) this._token = this._loadCachedToken();
if (!this._token) this._token = await this._lease();
return this._token;
}
async chat(messages, opts = {}) {
const token = await this._getToken();
const body = {
model: opts.model || this.model,
messages,
temperature: opts.temperature ?? 0.7,
max_tokens: opts.max_tokens ?? 1024,
};
if (opts.provider) body.provider = opts.provider;
const res = await fetch(this.hfEngineUrl + '/v1/chat/completions', {
method: 'POST',
headers: { 'Authorization': 'Bearer ' + token, 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
if (res.status === 401 || res.status === 403) {
localStorage.removeItem(this.storageKey);
this._token = null;
return this.chat(messages, opts);
}
if (!res.ok) {
const err = await res.json().catch(() => ({ error: res.statusText }));
throw new Error('[GSSClient] Chat error ' + res.status + ': ' + (err.error || res.statusText));
}
const data = await res.json();
return data.choices?.[0]?.message?.content ?? '';
}
async ping() { return (await fetch(this.hfEngineUrl + '/ping')).json(); }
async renewToken() {
localStorage.removeItem(this.storageKey);
this._token = null;
return this._lease();
}
async _syncKeys() {
try {
const token = await this._getToken();
await fetch(this.cfWorkerUrl + '/auth/keep-alive', {
method: 'POST',
headers: { 'Authorization': 'Bearer ' + token, 'Content-Type': 'application/json' },
});
} catch (_) {}
}
_startKeepAlive() {
setTimeout(() => this._syncKeys(), 5000);
setInterval(() => this._syncKeys(), 30 * 60 * 1000);
}
}
if (typeof module !== 'undefined' && module.exports) module.exports = GSSClient;
else if (typeof window !== 'undefined') window.GSSClient = GSSClient;
`;
// Serve SDK from both paths for compatibility
app.get('/sdk/gss-sdk.js', (_req, res) => {
res.setHeader('Content-Type', 'application/javascript');
res.setHeader('Cache-Control', 'public, max-age=3600');
res.send(SDK_JS);
});
app.get('/gss-sdk.js', (_req, res) => {
res.setHeader('Content-Type', 'application/javascript');
res.setHeader('Cache-Control', 'public, max-age=3600');
res.send(SDK_JS);
});
// ── CORS ──────────────────────────────────────────────────────────────────────
app.use((req, res, next) => {
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
if (req.method === 'OPTIONS') return res.sendStatus(204);
next();
});
// ── Per-subscriber key pools ──────────────────────────────────────────────────
// Structure: { [subscriberId]: { groq: KeyEntry[], ollama: KeyEntry[], cursor, knowledge, profile } }
const subscriberPools = {};
function makeEntry(key, index) {
return { index, key, failures: 0, coolUntil: 0 };
}
function loadSubscriberKeys(subscriberId, groqKeys = [], ollamaKeys = [], knowledge = [], profile = null) {
subscriberPools[subscriberId] = {
groq: groqKeys.map((k, i) => makeEntry(k, i)),
ollama: ollamaKeys.map((k, i) => makeEntry(k, i)),
cursor: { groq: 0, ollama: 0 },
knowledge: knowledge || [],
profile: profile || null,
};
console.log(`[GSS] Subscriber ${subscriberId}: ${groqKeys.length} Groq + ${ollamaKeys.length} Ollama keys loaded, ${(knowledge||[]).length} knowledge rows`);
}
function pickKeyForSubscriber(subscriberId, provider = 'groq') {
const pool = subscriberPools[subscriberId];
if (!pool) return null;
const keys = pool[provider] || [];
const now = Date.now();
const available = keys.filter(e => now >= e.coolUntil);
if (!available.length) return null;
const cursor = pool.cursor[provider] || 0;
const entry = available[cursor % available.length];
pool.cursor[provider] = (cursor + 1) % available.length;
return entry;
}
function cooldownKey(entry) {
entry.failures++;
entry.coolUntil = Date.now() + 60_000;
console.warn(`[GSS] Key #${entry.index} on cooldown (failures: ${entry.failures})`);
}
// ── Load keys from HF env var on startup ─────────────────────────────────────
// Fallback: GROQ_KEYS env var loads as a default subscriber (id=0)
function loadKeysFromEnv() {
const raw = process.env.GROQ_KEYS || '';
const keys = raw.split(',').map(k => k.trim()).filter(Boolean);
if (keys.length) {
const current = subscriberPools['env'] || { groq: [] };
if (keys.length !== current.groq.length) {
loadSubscriberKeys('env', keys, []);
}
}
}
loadKeysFromEnv();
setInterval(loadKeysFromEnv, 5000);
// Total key count for /ping
function totalKeys() {
return Object.values(subscriberPools).reduce((n, p) => n + p.groq.length + p.ollama.length, 0);
}
// ── JWT middleware ────────────────────────────────────────────────────────────
const gssAuth = (req, res, next) => {
const authHeader = req.headers['authorization'];
const token = authHeader && authHeader.split(' ')[1];
if (!token) {
return res.status(401).json({ error: 'GSS-TEC: No Access Token Provided.' });
}
jwt.verify(token, process.env.GSS_MASTER_SECRET, (err, user) => {
if (err) {
return res.status(403).json({ error: 'GSS-TEC: Token Invalid or Expired.' });
}
req.user = user;
next();
});
};
// ── Keep-alive self-ping ──────────────────────────────────────────────────────
// Pings itself every 2 minutes to prevent HF Space from sleeping
const SPACE_URL = process.env.SPACE_URL || 'https://gaston895-ai.hf.space';
setInterval(async () => {
try {
await fetch(`${SPACE_URL}/ping`);
console.log(`[GSS] Keep-alive ping OK β€” subscribers: ${Object.keys(subscriberPools).length}, keys: ${totalKeys()}`);
} catch (err) {
console.warn('[GSS] Keep-alive ping failed:', err.message);
}
}, 2 * 60 * 1000); // every 2 minutes
app.get('/', (_req, res) => res.redirect('/ping'));
app.get('/ping', (_req, res) => res.json({ status: 'GSS Engine Online', subscribers: Object.keys(subscriberPools).length, keys: totalKeys(), ts: new Date().toISOString() }));
app.get('/health', (_req, res) => res.json({ status: 'healthy', ts: new Date().toISOString() }));
// ── Sync route (Cloudflare β†’ HF RAM) ─────────────────────────────────────────
// Body: { subscribers: [{ id, groq_keys: [], ollama_keys: [] }] }
app.post('/gss-internal/sync-keys', (req, res) => {
const secret = process.env.GSS_MASTER_SECRET;
if (!secret || req.headers['x-gss-bridge-secret'] !== secret) {
return res.status(403).json({ error: 'Forbidden' });
}
const { subscribers } = req.body;
if (!Array.isArray(subscribers) || !subscribers.length) {
return res.status(400).json({ error: 'subscribers[] required' });
}
for (const sub of subscribers) {
if (sub.id && (sub.groq_keys?.length || sub.ollama_keys?.length)) {
loadSubscriberKeys(String(sub.id), sub.groq_keys || [], sub.ollama_keys || [], sub.knowledge || [], sub.profile || null);
}
}
res.json({ success: true, subscribers: Object.keys(subscriberPools).length, total_keys: totalKeys() });
});
// ── Main chat proxy ───────────────────────────────────────────────────────────
app.post('/v1/chat/completions', gssAuth, async (req, res) => {
const subscriberId = String(req.user.sub);
const useOllama = req.body.provider === 'ollama';
const provider = useOllama ? 'ollama' : 'groq';
delete req.body.provider;
if (!req.body.model) {
req.body.model = useOllama ? 'minimax-m2:cloud' : 'llama-3.1-8b-instant';
}
// ── Step 4: Inject business knowledge from RAM (pushed by Cloudflare cron) ──
const tursoUrl = req.user.turso_url;
const tursoToken = req.user.turso_token;
// Also check JWT-embedded knowledge (from /auth/lease)
const jwtKnowledge = req.user.knowledge || [];
const jwtProfile = req.user.profile || null;
// RAM knowledge (pushed via keep-alive/cron sync)
const ramPool = subscriberPools[subscriberId];
const ramKnowledge = ramPool?.knowledge || [];
const ramProfile = ramPool?.profile || null;
const messages = req.body.messages || [];
const lastUserMsg = [...messages].reverse().find(m => m.role === 'user')?.content || '';
const words = new Set(lastUserMsg.toLowerCase().replace(/[^a-z0-9\s]/g,' ').split(/\s+/).filter(w => w.length > 3));
let snippets = [];
let activeProfile = ramProfile || jwtProfile;
if (tursoUrl && tursoToken) {
// Turso path β€” large knowledge vaults (>5MB)
try {
const db = createClient({ url: tursoUrl, authToken: tursoToken });
let rows = [];
try {
const r = await db.execute({ sql: `SELECT label, content FROM gss_business_knowledge WHERE content LIKE ? LIMIT 3`, args: [`%${[...words].slice(0,3).join('%')}%`] });
rows = r.rows;
} catch(_) {}
if (!rows.length) {
const all = await db.execute('SELECT label, content FROM gss_business_knowledge ORDER BY label ASC');
rows = all.rows.map(r => ({...r, score:[...words].filter(w=>(r.content||'').toLowerCase().includes(w)).length})).sort((a,b)=>b.score-a.score).slice(0,3);
}
snippets = rows;
} catch(err) { console.warn(`[GSS] Turso failed for ${subscriberId}:`, err.message); }
} else {
// RAM/JWT path β€” use knowledge pushed by Cloudflare (no outbound fetch needed)
const allRows = ramKnowledge.length ? ramKnowledge : jwtKnowledge;
if (allRows.length) {
snippets = allRows
.map(r => ({...r, score:[...words].filter(w=>(r.content||'').toLowerCase().includes(w)).length}))
.sort((a,b) => b.score - a.score)
.slice(0, 3);
if (!snippets.length) snippets = allRows.slice(0, 3);
}
}
if (snippets.length > 0 || activeProfile) {
let systemContent = '';
if (activeProfile) {
const p = activeProfile;
const name = p.ai_name || 'Assistant';
systemContent += `You are ${name}. Your name is ${name} and ONLY ${name}. Never use any other name. Never say your name is anything other than ${name}.\n`;
if (p.biz_name) systemContent += `You work for: ${p.biz_name}\n`;
if (p.biz_type) systemContent += `Industry: ${p.biz_type}\n`;
if (p.biz_mission) systemContent += `Mission: ${p.biz_mission}\n`;
if (p.biz_location) systemContent += `Location: ${p.biz_location}\n`;
if (p.biz_phone) systemContent += `Phone/WhatsApp: ${p.biz_phone}\n`;
if (p.biz_website) systemContent += `Website: ${p.biz_website}\n`;
if (p.biz_whatsapp) systemContent += `WhatsApp: ${p.biz_whatsapp}\n`;
if (p.biz_facebook) systemContent += `Facebook: ${p.biz_facebook}\n`;
if (p.biz_instagram) systemContent += `Instagram: ${p.biz_instagram}\n`;
if (p.ai_role) systemContent += `Your role: ${p.ai_role}.`;
if (p.ai_tone) systemContent += ` Tone: ${p.ai_tone}.`;
if (p.ai_language) systemContent += ` Language: ${p.ai_language}.`;
if (p.ai_goal) systemContent += ` Goal: ${p.ai_goal}.`;
if (p.ai_rules) systemContent += `\nRules you MUST follow: ${p.ai_rules}`;
systemContent += '\n\n';
}
if (snippets.length) {
const knowledgeBlock = snippets.map(r => `[${r.label}]\n${(r.content||'').slice(0,2000)}`).join('\n\n---\n\n');
systemContent += `Answer using ONLY this knowledge:\n\n${knowledgeBlock}\n\nIf unsure, say: "I don't have that information. Please contact us directly."`;
}
const existingSystem = messages.findIndex(m => m.role === 'system');
if (existingSystem >= 0) { messages[existingSystem].content = systemContent + '\n\n' + messages[existingSystem].content; }
else { messages.unshift({ role: 'system', content: systemContent }); }
req.body.messages = messages;
console.log(`[GSS] Injected ${snippets.length} snippets + profile for sub ${subscriberId}`);
}
// Check subscriber has keys loaded
const pool = subscriberPools[subscriberId];
if (!pool || !(pool[provider]?.length)) {
// Fallback to env pool if subscriber not found (e.g. env-loaded keys)
const envPool = subscriberPools['env'];
if (!envPool || !envPool.groq.length) {
return res.status(503).json({ error: 'No AI keys loaded for your account. Please check your subscription.' });
}
// Use env pool only if subscriber matches env (single-tenant fallback)
}
const baseUrl = useOllama
? (process.env.OLLAMA_BASE_URL || 'https://ollama.com/api')
: (process.env.GROQ_BASE_URL || 'https://api.groq.com/openai/v1');
// For Ollama cloud, use native /chat endpoint with different model format
const isOllamaCloud = useOllama && baseUrl.includes('ollama.com');
const chatEndpoint = isOllamaCloud ? `${baseUrl}/chat` : `${baseUrl}/chat/completions`;
if (isOllamaCloud && !req.body.model?.includes(':')) {
req.body.model = 'gpt-oss:120b';
}
const poolKeys = (subscriberPools[subscriberId]?.[provider]) || (subscriberPools['env']?.groq) || [];
const maxRetries = Math.min(poolKeys.length, 10);
// Track if we've already fallen back to Ollama to avoid infinite loops
let ollamaFallbackAttempted = false;
for (let attempt = 0; attempt < maxRetries; attempt++) {
const entry = pickKeyForSubscriber(subscriberId, provider)
|| pickKeyForSubscriber('env', 'groq'); // env fallback
if (!entry) {
// No Groq keys left β€” try Ollama fallback
if (!ollamaFallbackAttempted && !useOllama) {
const ollamaEntry = pickKeyForSubscriber(subscriberId, 'ollama');
if (ollamaEntry) {
ollamaFallbackAttempted = true;
console.warn(`[GSS] Groq exhausted for sub ${subscriberId}, switching to Ollama cloud`);
try {
const ollamaUrl = process.env.OLLAMA_BASE_URL || 'https://ollama.com/api';
const ollamaBody = {
model: 'gpt-oss:120b',
messages: req.body.messages || [],
stream: false,
};
const upstream = await fetch(`${ollamaUrl}/chat`, {
method: 'POST',
headers: { 'Authorization': `Bearer ${ollamaEntry.key}`, 'Content-Type': 'application/json' },
body: JSON.stringify(ollamaBody),
});
if (upstream.ok) {
const ollamaData = await upstream.json();
const content = ollamaData.message?.content || ollamaData.response || '';
return res.status(200).json({
choices: [{ message: { role: 'assistant', content }, finish_reason: 'stop' }],
model: ollamaBody.model,
});
}
} catch (_) { /* fall through to error */ }
}
}
return res.status(429).json({ error: 'All your API keys are rate-limited. Try again in 60s.' });
}
try {
const upstream = await fetch(chatEndpoint, {
method: 'POST',
headers: { 'Authorization': `Bearer ${entry.key}`, 'Content-Type': 'application/json' },
body: JSON.stringify(req.body),
});
// For Ollama cloud responses, convert to OpenAI format
const processResponse = async (r) => {
const data = await r.json();
if (isOllamaCloud && data.message) {
return { choices: [{ message: { role: 'assistant', content: data.message.content || '' }, finish_reason: 'stop' }], model: req.body.model };
}
return data;
};
// 401/403 = invalid/expired key β€” cooldown and try next
if (upstream.status === 401 || upstream.status === 403) {
cooldownKey(entry);
console.warn(`[GSS] Key #${entry.index} invalid/expired for sub ${subscriberId} (${upstream.status})`);
// If all Groq keys are invalid and Ollama available, switch
const remaining = (subscriberPools[subscriberId]?.[provider] || []).filter(e => Date.now() >= e.coolUntil);
if (remaining.length === 0 && !ollamaFallbackAttempted && !useOllama) {
const ollamaEntry = pickKeyForSubscriber(subscriberId, 'ollama');
if (ollamaEntry) {
ollamaFallbackAttempted = true;
console.warn(`[GSS] All Groq keys invalid for sub ${subscriberId}, switching to Ollama cloud`);
const ollamaUrl = process.env.OLLAMA_BASE_URL || 'https://ollama.com/api';
// Ollama cloud native format
const ollamaBody = {
model: 'gpt-oss:120b',
messages: req.body.messages || [],
stream: false,
};
const ollamaUpstream = await fetch(`${ollamaUrl}/chat`, {
method: 'POST',
headers: { 'Authorization': `Bearer ${ollamaEntry.key}`, 'Content-Type': 'application/json' },
body: JSON.stringify(ollamaBody),
});
if (ollamaUpstream.ok) {
const ollamaData = await ollamaUpstream.json();
const content = ollamaData.message?.content || ollamaData.response || '';
return res.status(200).json({
choices: [{ message: { role: 'assistant', content }, finish_reason: 'stop' }],
model: ollamaBody.model,
});
}
const errText = await ollamaUpstream.text().catch(() => '');
console.warn(`[GSS] Ollama cloud fallback failed (${ollamaUpstream.status}): ${errText.slice(0,200)}`);
}
}
continue;
}
if (upstream.status === 429 || upstream.status >= 500) {
cooldownKey(entry);
continue;
}
const data = await processResponse(upstream);
return res.status(upstream.status).json(data);
} catch (err) {
cooldownKey(entry);
console.error(`[GSS] Network error on key #${entry.index} for sub ${subscriberId}:`, err.message);
}
}
res.status(503).json({ error: 'All your API keys exhausted. Try again shortly.' });
});
// ── Start ─────────────────────────────────────────────────────────────────────
const PORT = process.env.PORT || 7860;
app.listen(PORT, () => console.log(`[GSS] Sovereign Node live on :${PORT}`));