import sys import json import torch import os from sentence_transformers import SentenceTransformer def log(msg): sys.stderr.write(f"[GPU-Bridge] {msg}\n") sys.stderr.flush() def main(): try: # Check for GPU device = 'cuda' if torch.cuda.is_available() else 'cpu' log(f"Initializing on device: {device}") # Load model (optimized for T4 GPU) model_name = os.environ.get('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') log(f"Loading model: {model_name}...") model = SentenceTransformer(model_name, device=device) log("✅ Model loaded successfully.") # Signal readiness print(json.dumps({"status": "ready", "device": device})) sys.stdout.flush() # Processing Loop for line in sys.stdin: try: if not line.strip(): continue payload = json.loads(line) if 'text' in payload: # Single embedding embedding = model.encode(payload['text'], convert_to_numpy=True).tolist() print(json.dumps({"embedding": embedding})) elif 'texts' in payload: # Batch embedding embeddings = model.encode(payload['texts'], convert_to_numpy=True).tolist() print(json.dumps({"embeddings": embeddings})) elif 'ping' in payload: print(json.dumps({"pong": True})) sys.stdout.flush() except Exception as e: log(f"Error processing request: {str(e)}") print(json.dumps({"error": str(e)})) sys.stdout.flush() except Exception as e: log(f"Fatal startup error: {str(e)}") sys.exit(1) if __name__ == "__main__": main()