Spaces:
Paused
Paused
| import sys | |
| import json | |
| import torch | |
| import os | |
| from sentence_transformers import SentenceTransformer | |
| def log(msg): | |
| sys.stderr.write(f"[GPU-Bridge] {msg}\n") | |
| sys.stderr.flush() | |
| def main(): | |
| try: | |
| # Check for GPU | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| log(f"Initializing on device: {device}") | |
| # Load model (optimized for T4 GPU) | |
| model_name = os.environ.get('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') | |
| log(f"Loading model: {model_name}...") | |
| model = SentenceTransformer(model_name, device=device) | |
| log("✅ Model loaded successfully.") | |
| # Signal readiness | |
| print(json.dumps({"status": "ready", "device": device})) | |
| sys.stdout.flush() | |
| # Processing Loop | |
| for line in sys.stdin: | |
| try: | |
| if not line.strip(): | |
| continue | |
| payload = json.loads(line) | |
| if 'text' in payload: | |
| # Single embedding | |
| embedding = model.encode(payload['text'], convert_to_numpy=True).tolist() | |
| print(json.dumps({"embedding": embedding})) | |
| elif 'texts' in payload: | |
| # Batch embedding | |
| embeddings = model.encode(payload['texts'], convert_to_numpy=True).tolist() | |
| print(json.dumps({"embeddings": embeddings})) | |
| elif 'ping' in payload: | |
| print(json.dumps({"pong": True})) | |
| sys.stdout.flush() | |
| except Exception as e: | |
| log(f"Error processing request: {str(e)}") | |
| print(json.dumps({"error": str(e)})) | |
| sys.stdout.flush() | |
| except Exception as e: | |
| log(f"Fatal startup error: {str(e)}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |