Bc-AI commited on
Commit
8777866
·
verified ·
1 Parent(s): 186936c

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +23 -0
  2. README.md +12 -10
  3. app.py +691 -0
  4. requirements.txt +12 -0
  5. space-config.yaml +12 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ gcc \
8
+ g++ \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Copy requirements first to leverage Docker cache
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy application code
16
+ COPY app.py .
17
+ COPY shared ./shared
18
+
19
+ # Expose port for the API
20
+ EXPOSE 7860
21
+
22
+ # Start the application
23
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,12 @@
1
- ---
2
- title: Head
3
- emoji: 🏆
4
- colorFrom: yellow
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ # SACCP Head Node
2
+ This is a head node in the SACCP (Scalable Accelerated Compute Protocol) distributed computing network.
3
+
4
+ ## Node Type: HEAD
5
+ - Processes tasks according to SACCP protocol
6
+ - Contributes computational resources to the network
7
+ - Earns cloud credits for resource contribution
8
+
9
+ ## Architecture
10
+ - Built with FastAPI and TensorFlow/Keras
11
+ - Implements fault-tolerant operations
12
+ - Integrated with SACCP credit system
app.py ADDED
@@ -0,0 +1,691 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import requests
5
+ import asyncio
6
+ import random
7
+ from datetime import datetime
8
+ from typing import Dict, List, Optional
9
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
10
+ from fastapi.responses import StreamingResponse
11
+ import uvicorn
12
+ from pydantic import BaseModel
13
+ from shared.models import ChatRequest, ChatResponse, ChatMessage, WorkerStatus, NodeType
14
+ from shared.node_types import NodeRegistrationRequest, NodeRegistrationResponse, NodeListResponse, NodeStatus, ServiceOffering, ServiceRequest
15
+ from shared.approval_system import smilyai_approval_system, ApprovalType
16
+ from shared.credits_system import credits_system, CreditReason, TransactionType
17
+ from shared.fault_tolerance import fault_tolerance_manager, FailureType, RecoveryStrategy
18
+ from shared.load_balancer import load_balancer, Task, TaskPriority
19
+ from shared.chat_history import save_detailed_chat_log, initialize_chat_file
20
+
21
+ app = FastAPI(
22
+ title="Multi-Node Hugging Face API Gateway",
23
+ description="API Gateway that routes requests to specialized worker nodes",
24
+ version="1.0.0"
25
+ )
26
+
27
+ # Initialize chat history file
28
+ initialize_chat_file()
29
+
30
+ # Configuration - in production, these would come from environment variables
31
+ WORKER_NODES = {
32
+ "sam-x-nano": os.getenv("NANO_WORKER_URL", "http://nano-worker:8000"),
33
+ "sam-x-mini": os.getenv("MINI_WORKER_URL", "http://mini-worker:8000"),
34
+ "sam-x-fast": os.getenv("FAST_WORKER_URL", "http://fast-worker:8000"),
35
+ "sam-x-large": os.getenv("LARGE_WORKER_URL", "http://large-worker:8000"),
36
+ "sam-large-2": os.getenv("SAM2_WORKER_URL", "http://sam2-worker:8000"), # Added Sam 2 support
37
+ "universal": os.getenv("UNIVERSAL_WORKER_URL", "http://universal-worker:8000"), # Universal worker that supports all models
38
+ }
39
+
40
+ # In-memory worker status tracking (in production, use Redis or database)
41
+ worker_status = {}
42
+
43
+ @app.on_event('startup')
44
+ def startup_event():
45
+ print("Starting Multi-Node Hugging Face API Gateway...")
46
+ # Initialize worker status
47
+ for model, url in WORKER_NODES.items():
48
+ worker_status[model] = {"active": True, "last_check": time.time(), "load": 0.0}
49
+
50
+
51
+ def route_to_worker(chat_request: ChatRequest) -> Dict:
52
+ """
53
+ Route the request to the appropriate worker node based on model
54
+ """
55
+ model = chat_request.model.lower()
56
+
57
+ # Check if model is supported
58
+ if model not in WORKER_NODES:
59
+ # Find closest matching model
60
+ available_models = [m for m in WORKER_NODES.keys() if model in m or m in model]
61
+ if available_models:
62
+ model = available_models[0] # Use first available match
63
+ else:
64
+ raise HTTPException(status_code=400, detail=f"Model {chat_request.model} not available")
65
+
66
+ worker_url = WORKER_NODES[model]
67
+
68
+ # Make request to worker
69
+ try:
70
+ response = requests.post(
71
+ f"{worker_url}/chat/completions",
72
+ json=chat_request.dict(),
73
+ timeout=300, # 5 minute timeout for long inference
74
+ stream=chat_request.stream # Enable streaming if requested
75
+ )
76
+ response.raise_for_status()
77
+
78
+ if chat_request.stream:
79
+ # For streaming, return response object to be handled by streaming function
80
+ return {"streaming": True, "response": response}
81
+ else:
82
+ return response.json()
83
+ except requests.exceptions.RequestException as e:
84
+ print(f"Error contacting worker {worker_url}: {str(e)}")
85
+ worker_status[model] = {"active": False, "last_check": time.time(), "load": 0.0}
86
+ raise HTTPException(status_code=503, detail=f"Worker for model {model} is not available")
87
+ except Exception as e:
88
+ print(f"Unexpected error contacting worker {worker_url}: {str(e)}")
89
+ raise HTTPException(status_code=500, detail="Internal server error")
90
+
91
+
92
+ def route_streaming_request(chat_request: ChatRequest):
93
+ """
94
+ Handle streaming request by forwarding the stream from worker to client
95
+ """
96
+ model = chat_request.model.lower()
97
+
98
+ # Check if model is supported
99
+ if model not in WORKER_NODES:
100
+ # Find closest matching model
101
+ available_models = [m for m in WORKER_NODES.keys() if model in m or m in model]
102
+ if available_models:
103
+ model = available_models[0] # Use first available match
104
+ else:
105
+ raise HTTPException(status_code=400, detail=f"Model {chat_request.model} not available")
106
+
107
+ worker_url = WORKER_NODES[model]
108
+
109
+ import requests
110
+ # Stream request to worker
111
+ worker_response = requests.post(
112
+ f"{worker_url}/chat/completions",
113
+ json=chat_request.dict(),
114
+ timeout=300, # 5 minute timeout for long inference
115
+ stream=True
116
+ )
117
+
118
+ # Forward the stream
119
+ def generate():
120
+ for chunk in worker_response.iter_lines():
121
+ if chunk:
122
+ decoded_chunk = chunk.decode('utf-8')
123
+ yield decoded_chunk + "\n"
124
+
125
+ return StreamingResponse(generate(), media_type="text/event-stream")
126
+
127
+
128
+ @app.post("/chat/completions", response_model=ChatResponse)
129
+ async def chat_completions(request: ChatRequest, background_tasks: BackgroundTasks):
130
+ """
131
+ Main chat completions endpoint - routes to appropriate worker
132
+ """
133
+ start_time = time.time()
134
+
135
+ try:
136
+ # If streaming is requested, handle differently
137
+ if request.stream:
138
+ # For streaming, route directly to appropriate worker
139
+ return route_streaming_request(request)
140
+
141
+ # Route to appropriate worker for non-streaming requests
142
+ worker_response = route_to_worker(request)
143
+
144
+ # Calculate processing time
145
+ processing_time = time.time() - start_time
146
+
147
+ # Extract response content
148
+ response_content = ""
149
+ if "choices" in worker_response and len(worker_response["choices"]) > 0:
150
+ response_content = worker_response["choices"][0].get("message", {}).get("content", "")
151
+
152
+ # Save chat history in background
153
+ background_tasks.add_task(
154
+ save_detailed_chat_log,
155
+ request.dict(),
156
+ response_content,
157
+ request.model,
158
+ processing_time
159
+ )
160
+
161
+ return worker_response
162
+
163
+ except HTTPException:
164
+ # Re-raise HTTP exceptions
165
+ raise
166
+ except Exception as e:
167
+ print(f"Error in chat_completions: {str(e)}")
168
+ raise HTTPException(status_code=500, detail="Internal server error")
169
+
170
+
171
+ @app.get("/models")
172
+ async def list_models():
173
+ """
174
+ List available models
175
+ """
176
+ available_models = [model for model, url in WORKER_NODES.items()
177
+ if worker_status.get(model, {}).get("active", True)]
178
+
179
+ return {
180
+ "object": "list",
181
+ "data": [
182
+ {
183
+ "id": model,
184
+ "object": "model",
185
+ "created": int(time.time()),
186
+ "owned_by": "multinode-hf-api"
187
+ }
188
+ for model in available_models
189
+ ]
190
+ }
191
+
192
+
193
+ @app.get("/health")
194
+ async def health_check():
195
+ """
196
+ Health check endpoint
197
+ """
198
+ active_workers = {model: status for model, status in worker_status.items()
199
+ if status.get("active", False)}
200
+
201
+ return {
202
+ "status": "healthy" if active_workers else "no_active_workers",
203
+ "active_workers": list(active_workers.keys()),
204
+ "total_workers": len(WORKER_NODES)
205
+ }
206
+
207
+
208
+ @app.get("/worker-status")
209
+ async def get_worker_status():
210
+ """
211
+ Get detailed status of all workers
212
+ """
213
+ return worker_status
214
+
215
+
216
+ @app.post("/chat")
217
+ async def simple_chat(message: str, model: str = "sam-x-nano", max_tokens: int = 512):
218
+ """
219
+ Simplified chat endpoint for basic interactions
220
+ """
221
+ chat_request = ChatRequest(
222
+ messages=[ChatMessage(role="user", content=message)],
223
+ model=model,
224
+ max_tokens=max_tokens
225
+ )
226
+
227
+ worker_response = route_to_worker(chat_request)
228
+
229
+ if "choices" in worker_response and len(worker_response["choices"]) > 0:
230
+ return {"response": worker_response["choices"][0]["message"]["content"]}
231
+ else:
232
+ raise HTTPException(status_code=500, detail="No response from worker")
233
+
234
+
235
+ # Available services in the marketplace
236
+ marketplace_services = [
237
+ ServiceOffering(
238
+ service_id="storage_1",
239
+ service_name="SACCP Cloud Storage",
240
+ description="Distributed storage on SACCP network",
241
+ price_per_unit=0.01, # 0.01 credits per GB/month
242
+ unit_type="gb_month"
243
+ ),
244
+ ServiceOffering(
245
+ service_id="compute_1",
246
+ service_name="SACCP Compute Power",
247
+ description="Distributed computing on SACCP network",
248
+ price_per_unit=0.10, # 0.10 credits per compute hour
249
+ unit_type="compute_hour"
250
+ ),
251
+ ServiceOffering(
252
+ service_id="ai_model_hosting_1",
253
+ service_name="AI Model Hosting",
254
+ description="Host and serve AI models on SACCP network",
255
+ price_per_unit=0.05, # 0.05 credits per model-hour
256
+ unit_type="model_hour"
257
+ )
258
+ ]
259
+
260
+ # Smilyai approved head nodes (for security)
261
+ approved_head_nodes = set()
262
+
263
+ @app.post("/saccp/register-worker", response_model=NodeRegistrationResponse)
264
+ async def register_worker(registration_request: NodeRegistrationRequest):
265
+ """
266
+ Register a worker node with the SACCP network
267
+ """
268
+ # For HEAD nodes, require smilyai approval
269
+ if registration_request.capabilities.node_type == NodeType.HEAD:
270
+ is_approved = smilyai_approval_system.is_approved(
271
+ registration_request.node_id,
272
+ ApprovalType.HEAD_NODE
273
+ )
274
+
275
+ if not is_approved:
276
+ # Request approval for HEAD node
277
+ request_id = smilyai_approval_system.request_approval(
278
+ node_id=registration_request.node_id,
279
+ endpoint=registration_request.endpoint,
280
+ request_type=ApprovalType.HEAD_NODE,
281
+ request_data=registration_request.dict(),
282
+ reason="HEAD node registration",
283
+ requested_by="system"
284
+ )
285
+
286
+ # For now, return pending approval
287
+ # In a real system, you might want to allow some limited access while pending
288
+ pending_requests = smilyai_approval_system.get_pending_requests()
289
+ is_still_pending = any(req.request_id == request_id for req in pending_requests)
290
+
291
+ if is_still_pending:
292
+ return NodeRegistrationResponse(
293
+ success=False,
294
+ node_id=registration_request.node_id,
295
+ message="HEAD node registration requires approval, submitted for review",
296
+ approval_status="pending"
297
+ )
298
+ else:
299
+ # Check if it was approved in the meantime
300
+ is_approved = smilyai_approval_system.is_approved(
301
+ registration_request.node_id,
302
+ ApprovalType.HEAD_NODE
303
+ )
304
+ if is_approved:
305
+ # Add to approved head nodes
306
+ approved_head_nodes.add(registration_request.node_id)
307
+ # Register with fault tolerance system
308
+ fault_tolerance_manager.register_node(
309
+ registration_request.node_id,
310
+ registration_request.capabilities.node_type,
311
+ registration_request.capabilities.dict()
312
+ )
313
+
314
+ return NodeRegistrationResponse(
315
+ success=True,
316
+ node_id=registration_request.node_id,
317
+ message=f"Successfully registered {registration_request.capabilities.node_type} node",
318
+ approval_status="approved"
319
+ )
320
+ else:
321
+ return NodeRegistrationResponse(
322
+ success=False,
323
+ node_id=registration_request.node_id,
324
+ message="HEAD node registration denied",
325
+ approval_status="rejected"
326
+ )
327
+ else:
328
+ # Add to approved head nodes
329
+ approved_head_nodes.add(registration_request.node_id)
330
+ # Register with fault tolerance system
331
+ fault_tolerance_manager.register_node(
332
+ registration_request.node_id,
333
+ registration_request.capabilities.node_type,
334
+ registration_request.capabilities.dict()
335
+ )
336
+ else:
337
+ # Register non-HEAD nodes with fault tolerance system
338
+ fault_tolerance_manager.register_node(
339
+ registration_request.node_id,
340
+ registration_request.capabilities.node_type,
341
+ registration_request.capabilities.dict()
342
+ )
343
+
344
+ # Register with load balancer
345
+ load_balancer.register_node(
346
+ registration_request.node_id,
347
+ registration_request.capabilities.node_type,
348
+ registration_request.capabilities.dict()
349
+ )
350
+
351
+ # In a real system, you would store the worker info in a database
352
+ # For now, we'll just return success
353
+ return NodeRegistrationResponse(
354
+ success=True,
355
+ node_id=registration_request.node_id,
356
+ message=f"Successfully registered {registration_request.capabilities.node_type} node",
357
+ approval_status="approved" # In a real system, this might be "pending" initially
358
+ )
359
+
360
+
361
+ @app.post("/saccp/heartbeat")
362
+ async def heartbeat(worker_id: str):
363
+ """
364
+ Worker heartbeat to maintain connection with the network
365
+ """
366
+ # Record heartbeat in fault tolerance system
367
+ ft_success = fault_tolerance_manager.heartbeat(worker_id)
368
+
369
+ # Record heartbeat in load balancer
370
+ lb_success = load_balancer.heartbeat_node(worker_id)
371
+
372
+ if ft_success and lb_success:
373
+ return {"status": "alive", "timestamp": int(time.time())}
374
+ else:
375
+ status = "alive" if ft_success or lb_success else "unknown_node"
376
+ return {"status": status, "timestamp": int(time.time())}
377
+
378
+
379
+ @app.get("/saccp/next-task")
380
+ async def get_next_task(worker_id: str):
381
+ """
382
+ Get the next task for a worker
383
+ """
384
+ # In a real system, check the task queue for available tasks for this worker
385
+ # based on the worker's capabilities
386
+ # For now, return empty dict meaning no tasks available
387
+ # In the real implementation, this would be handled by the load balancer
388
+ return {} # Empty dict means no tasks available
389
+
390
+
391
+ @app.post("/saccp/task-result")
392
+ async def report_task_result(worker_id: str, task_id: str, result: Dict):
393
+ """
394
+ Report task completion result
395
+ """
396
+ # Record task completion in fault tolerance system
397
+ success = fault_tolerance_manager.record_task_completion(task_id, worker_id)
398
+
399
+ # Award credits to the worker for completing the task
400
+ # Different task types earn different amounts of credits
401
+ task_type = result.get('task_type', 'compute')
402
+
403
+ # Award credits based on task type and complexity
404
+ if task_type == 'inference':
405
+ credits_awarded = 0.1 # Small amount for inference tasks
406
+ elif task_type == 'training':
407
+ credits_awarded = 1.0 # Larger amount for training tasks
408
+ else:
409
+ credits_awarded = 0.5 # Default amount for other task types
410
+
411
+ # Add credits to worker
412
+ credits_system.add_credits(worker_id, credits_awarded, CreditReason.TASK_COMPLETION,
413
+ metadata={"task_id": task_id, "task_type": task_type})
414
+
415
+ return {
416
+ "status": "received",
417
+ "credits_awarded": credits_awarded,
418
+ "task_completed": success,
419
+ "new_balance": credits_system.get_balance(worker_id).balance
420
+ }
421
+
422
+
423
+ @app.post("/saccp/task-error")
424
+ async def report_task_error(worker_id: str, task_id: str, error: str):
425
+ """
426
+ Report task error to the network
427
+ """
428
+ # Record task failure in fault tolerance system
429
+ recovery_strategy = fault_tolerance_manager.record_task_failure(
430
+ task_id, worker_id, FailureType.TASK_TIMEOUT, error
431
+ )
432
+
433
+ return {
434
+ "status": "error_received",
435
+ "recovery_strategy": recovery_strategy.value if recovery_strategy else "none"
436
+ }
437
+
438
+
439
+ @app.get("/saccp/stats")
440
+ async def get_network_stats():
441
+ """
442
+ Get network statistics
443
+ """
444
+ # Get statistics from fault tolerance system
445
+ health_stats = fault_tolerance_manager.get_network_health()
446
+
447
+ return health_stats
448
+
449
+
450
+ @app.get("/saccp/health-detailed")
451
+ async def get_detailed_health():
452
+ """
453
+ Get detailed network health including failed nodes
454
+ """
455
+ health_stats = fault_tolerance_manager.get_network_health()
456
+ failed_nodes = fault_tolerance_manager.get_failed_nodes()
457
+
458
+ return {
459
+ "network_health": health_stats,
460
+ "failed_nodes": failed_nodes,
461
+ "timestamp": int(time.time())
462
+ }
463
+
464
+
465
+ @app.get("/saccp/nodes")
466
+ async def get_nodes():
467
+ """
468
+ Get list of nodes in the network
469
+ """
470
+ # Get node status from load balancer
471
+ node_status = load_balancer.get_node_status()
472
+
473
+ return NodeListResponse(
474
+ nodes=node_status,
475
+ total_nodes=len(node_status),
476
+ online_nodes=len([n for n in node_status if n["is_available"]])
477
+ )
478
+
479
+
480
+ @app.post("/saccp/submit-task")
481
+ async def submit_task_for_distribution(task_data: Dict):
482
+ """
483
+ Submit a task for distribution across the network
484
+ """
485
+ task_id = task_data.get("task_id", f"task_{int(time.time())}_{random.randint(1000, 9999)}")
486
+ task_type = task_data.get("task_type", "compute")
487
+
488
+ # Determine task priority
489
+ priority_str = task_data.get("priority", "normal")
490
+ priority_map = {
491
+ "low": TaskPriority.LOW,
492
+ "normal": TaskPriority.NORMAL,
493
+ "high": TaskPriority.HIGH,
494
+ "critical": TaskPriority.CRITICAL
495
+ }
496
+ priority = priority_map.get(priority_str, TaskPriority.NORMAL)
497
+
498
+ # Create resource requirements
499
+ resource_requirements = task_data.get("resource_requirements", {})
500
+
501
+ # Create the task
502
+ task = Task(
503
+ task_id=task_id,
504
+ task_type=task_type,
505
+ priority=priority,
506
+ resource_requirements=resource_requirements,
507
+ estimated_duration=task_data.get("estimated_duration", 30.0), # seconds
508
+ created_at=time.time()
509
+ )
510
+
511
+ # Submit to load balancer
512
+ assigned_node = load_balancer.submit_task(task)
513
+
514
+ return {
515
+ "task_id": task_id,
516
+ "status": "submitted",
517
+ "assigned_node": assigned_node,
518
+ "timestamp": int(time.time())
519
+ }
520
+
521
+
522
+ @app.get("/saccp/load-balancer-status")
523
+ async def get_load_balancer_status():
524
+ """
525
+ Get status of the load balancer
526
+ """
527
+ node_status = load_balancer.get_node_status()
528
+ queue_status = load_balancer.get_task_queue_status()
529
+
530
+ return {
531
+ "node_status": node_status,
532
+ "task_queue": queue_status,
533
+ "timestamp": int(time.time())
534
+ }
535
+
536
+
537
+ @app.get("/credits/balance/{node_id}")
538
+ async def get_credit_balance(node_id: str):
539
+ """
540
+ Get credit balance for a node
541
+ """
542
+ balance = credits_system.get_balance(node_id)
543
+ return balance
544
+
545
+
546
+ @app.get("/credits/earn/{node_id}/{amount}")
547
+ async def earn_credits(node_id: str, amount: float, reason: str = "task_completion"):
548
+ """
549
+ Endpoint for nodes to earn credits by contributing resources
550
+ """
551
+ try:
552
+ credit_reason = CreditReason(reason) if reason in CreditReason.__members__ else CreditReason.RESOURCE_CONTRIBUTION
553
+ success = credits_system.add_credits(node_id, amount, credit_reason)
554
+
555
+ if success:
556
+ balance = credits_system.get_balance(node_id)
557
+ return {"status": "success", "new_balance": balance.balance}
558
+ else:
559
+ return {"status": "failed", "message": "Failed to add credits"}
560
+ except Exception as e:
561
+ return {"status": "error", "message": str(e)}
562
+
563
+
564
+ @app.get("/marketplace/services")
565
+ async def get_marketplace_services():
566
+ """
567
+ Get list of available services in the marketplace
568
+ """
569
+ return marketplace_services
570
+
571
+
572
+ @app.post("/marketplace/purchase")
573
+ async def purchase_service(service_request: ServiceRequest):
574
+ """
575
+ Purchase a service from the marketplace
576
+ """
577
+ # Find the requested service
578
+ service = None
579
+ for s in marketplace_services:
580
+ if s.service_id == service_request.service_id:
581
+ service = s
582
+ break
583
+
584
+ if not service:
585
+ raise HTTPException(status_code=404, detail="Service not found")
586
+
587
+ if not service.availability:
588
+ raise HTTPException(status_code=400, detail="Service not available")
589
+
590
+ # Calculate total cost
591
+ total_cost = service.price_per_unit * service_request.quantity
592
+
593
+ # Attempt to spend credits
594
+ success = credits_system.spend_credits(
595
+ service_request.node_id,
596
+ total_cost,
597
+ CreditReason.SERVICE_PURCHASE,
598
+ service.service_name,
599
+ metadata=service_request.parameters
600
+ )
601
+
602
+ if not success:
603
+ raise HTTPException(status_code=400, detail="Insufficient credits")
604
+
605
+ # Get updated balance
606
+ balance = credits_system.get_balance(service_request.node_id)
607
+
608
+ return {
609
+ "status": "success",
610
+ "service_id": service.service_id,
611
+ "service_name": service.service_name,
612
+ "cost": total_cost,
613
+ "remaining_balance": balance.balance
614
+ }
615
+
616
+
617
+ # Additional endpoints for credit earning based on node type and contributions
618
+
619
+ @app.post("/credits/earn-resource-contribution")
620
+ async def earn_credits_for_resource_contribution(node_id: str, node_type: NodeType, duration_hours: float,
621
+ resource_amount: float = 1.0):
622
+ """
623
+ Endpoint for nodes to earn credits by contributing resources to the network
624
+ Credits are awarded based on node type, duration, and amount of resources contributed
625
+ """
626
+ # Different node types earn different rates
627
+ base_rates = {
628
+ NodeType.RAM: 0.5, # 0.5 credits per hour per resource unit for RAM nodes
629
+ NodeType.DISK: 0.3, # 0.3 credits per hour per resource unit for disk nodes
630
+ NodeType.COMPUTE: 0.4, # 0.4 credits per hour per resource unit for compute nodes
631
+ NodeType.GPU: 1.0, # 1.0 credits per hour per resource unit for GPU nodes
632
+ NodeType.TPU: 1.5, # 1.5 credits per hour per resource unit for TPU nodes
633
+ NodeType.NPU: 1.2, # 1.2 credits per hour per resource unit for NPU nodes
634
+ NodeType.HEAD: 0.8 # 0.8 credits per hour per resource unit for head nodes
635
+ }
636
+
637
+ rate = base_rates.get(node_type, 0.4) # Default to compute rate
638
+ credits_to_earn = rate * duration_hours * resource_amount
639
+
640
+ success = credits_system.add_credits(
641
+ node_id,
642
+ credits_to_earn,
643
+ CreditReason.RESOURCE_CONTRIBUTION,
644
+ metadata={
645
+ "node_type": node_type,
646
+ "duration_hours": duration_hours,
647
+ "resource_amount": resource_amount
648
+ }
649
+ )
650
+
651
+ if success:
652
+ balance = credits_system.get_balance(node_id)
653
+ return {
654
+ "status": "success",
655
+ "credits_earned": credits_to_earn,
656
+ "new_balance": balance.balance
657
+ }
658
+ else:
659
+ return {"status": "failed", "message": "Failed to award credits"}
660
+
661
+
662
+ @app.get("/credits/top-contributors")
663
+ async def get_top_contributors(limit: int = 10):
664
+ """
665
+ Get the top contributing nodes in the network
666
+ """
667
+ top_nodes = credits_system.get_top_nodes_by_balance(limit)
668
+ return {
669
+ "top_contributors": top_nodes,
670
+ "total_nodes_in_network": len(top_nodes) # This would be from a full node list in real implementation
671
+ }
672
+
673
+
674
+ @app.get("/saccp/node-stats/{node_id}")
675
+ async def get_node_stats(node_id: str):
676
+ """
677
+ Get comprehensive statistics for a node including credit information
678
+ """
679
+ balance = credits_system.get_balance(node_id)
680
+ transactions = credits_system.get_transaction_history(node_id, limit=10)
681
+
682
+ return {
683
+ "node_id": node_id,
684
+ "credit_balance": balance,
685
+ "recent_transactions": transactions,
686
+ "status": "active" # This would check actual node status in a real implementation
687
+ }
688
+
689
+
690
+ if __name__ == "__main__":
691
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Requirements for Head Node
2
+ fastapi==0.104.1
3
+ uvicorn==0.24.0
4
+ requests==2.31.0
5
+ pydantic==2.5.0
6
+ python-multipart==0.0.6
7
+ huggingface_hub==0.20.1
8
+ tokenizers==0.15.0
9
+ transformers==4.35.2
10
+ numpy==1.24.3
11
+ pytz==2023.3.post1
12
+ aiohttp==3.9.0
space-config.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SACCP Node Space Configuration
2
+ runtime:
3
+ cpu: "medium"
4
+ memory: "16x"
5
+ accelerator: "cpu" # Will be configured based on node type
6
+ env:
7
+ NODE_TYPE: "head"
8
+ NANO_WORKER_URL: "https://Bc-AI-worker-nano.hf.space"
9
+ MINI_WORKER_URL: "https://Bc-AI-worker-mini.hf.space"
10
+ FAST_WORKER_URL: "https://Bc-AI-worker-fast.hf.space"
11
+ LARGE_WORKER_URL: "https://Bc-AI-worker-large.hf.space"
12
+ UNIVERSAL_WORKER_URL: "https://Bc-AI-worker-universal.hf.space"