Spaces:

chryzxc
/

portfolio-inference-provider

Sleeping

App Files Files Community

chryzxc commited on May 16

Commit

0e2d401

verified ·

1 Parent(s): 20e9804

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -19

app.py CHANGED Viewed

@@ -1,38 +1,51 @@
-from fastapi import FastAPI, HTTPException
 from onnxruntime import InferenceSession
 from transformers import AutoTokenizer
 import numpy as np
 import os
 app = FastAPI()
-# Initialize tokenizer (doesn't require PyTorch/TensorFlow)
 tokenizer = AutoTokenizer.from_pretrained(
     "Xenova/multi-qa-mpnet-base-dot-v1",
-    use_fast=True,  # Uses Rust implementation
     legacy=False
 )
 # Load ONNX model
-session = InferenceSession("model.onnx")
 @app.get("/")
-def read_root():
-    return {"status": "ONNX Model API is running"}
 @app.post("/api/predict")
-async def predict(text: str):
     try:
-        # Tokenize without framework dependencies
         inputs = tokenizer(
             text,
-            return_tensors="np",  # Get NumPy arrays directly
-            padding=True,
             truncation=True,
-            max_length=32  # Match your model's expected input size
         )
-        # Prepare ONNX inputs
         onnx_inputs = {
             "input_ids": inputs["input_ids"].astype(np.int64),
             "attention_mask": inputs["attention_mask"].astype(np.int64)
@@ -41,21 +54,21 @@ async def predict(text: str):
         # Run inference
         outputs = session.run(None, onnx_inputs)
-        # Convert to native Python types
         return {
-            "embedding": outputs[0].astype(np.float32).tolist(),
             "tokens": tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
         }
     except Exception as e:
-        raise HTTPException(status_code=400, detail=str(e))
 if __name__ == "__main__":
     uvicorn.run(
-        app,
         host="0.0.0.0",
         port=7860,
-        # Required for Spaces:
-        proxy_headers=True,
-        forwarded_allow_ips="*"
     )

+from fastapi import FastAPI, HTTPException, Request
 from onnxruntime import InferenceSession
 from transformers import AutoTokenizer
 import numpy as np
 import os
+import uvicorn
 app = FastAPI()
+# Initialize tokenizer
 tokenizer = AutoTokenizer.from_pretrained(
     "Xenova/multi-qa-mpnet-base-dot-v1",
+    use_fast=True,
     legacy=False
 )
 # Load ONNX model
+try:
+    session = InferenceSession("model.onnx")
+    print("Model loaded successfully")
+except Exception as e:
+    print(f"Failed to load model: {str(e)}")
+    raise
 @app.get("/")
+def health_check():
+    return {"status": "OK", "model": "ONNX"}
 @app.post("/api/predict")
+async def predict(request: Request):
     try:
+        # Get JSON input
+        data = await request.json()
+        text = data.get("text", "")
+        if not text:
+            raise HTTPException(status_code=400, detail="No text provided")
+        # Tokenize input
         inputs = tokenizer(
             text,
+            return_tensors="np",
+            padding="max_length",
             truncation=True,
+            max_length=32
         )
+        # Prepare ONNX inputs with correct shapes
         onnx_inputs = {
             "input_ids": inputs["input_ids"].astype(np.int64),
             "attention_mask": inputs["attention_mask"].astype(np.int64)
         # Run inference
         outputs = session.run(None, onnx_inputs)
+        # Convert outputs to list and handle numpy types
+        embedding = outputs[0][0].astype(float).tolist()  # First output, first batch
         return {
+            "embedding": embedding,
             "tokens": tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
         }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     uvicorn.run(
+        "app:app",
         host="0.0.0.0",
         port=7860,
+        reload=False
     )