ityndall commited on
Commit
27abab4
·
1 Parent(s): ad4d4f6

Update API with FastAPI implementation, Docker support, and improved documentation

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. README.md +66 -9
  3. app.py +132 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . /code
10
+
11
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,14 +1,71 @@
1
  ---
2
- title: James River Api
3
- emoji: 🔥
4
- colorFrom: pink
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.32.0
8
- app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- short_description: text classifier
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: James River API
3
+ emoji: 🏗️
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
 
8
  pinned: false
9
  license: apache-2.0
10
+ short_description: James River Survey Classification API
11
  ---
12
 
13
+ # James River Survey Classification API
14
+
15
+ This is a FastAPI-based text classification API that categorizes survey-related messages into different job types for James River surveying services.
16
+
17
+ ## Model
18
+
19
+ The API uses the `ityndall/james-river-classifier` model, which is a BERT-based classifier trained to categorize survey requests into:
20
+
21
+ - Boundary Survey
22
+ - Construction Survey
23
+ - Fence Staking
24
+ - Other/General
25
+ - Real Estate Survey
26
+ - Subdivision Survey
27
+
28
+ ## API Usage
29
+
30
+ ### Endpoint: POST /predict
31
+
32
+ Send a JSON payload with a "message" field:
33
+
34
+ ```json
35
+ {
36
+ "message": "I need a boundary survey for my property"
37
+ }
38
+ ```
39
+
40
+ Response:
41
+ ```json
42
+ {
43
+ "label": "Boundary Survey",
44
+ "confidence": 0.85
45
+ }
46
+ ```
47
+
48
+ ### Example using curl:
49
+
50
+ ```bash
51
+ curl -X POST "https://ityndall-james-river-api.hf.space/predict" \
52
+ -H "Content-Type: application/json" \
53
+ -d '{"message": "I need a boundary survey for my property"}'
54
+ ```
55
+
56
+ ### Example using Python:
57
+
58
+ ```python
59
+ import requests
60
+
61
+ url = "https://ityndall-james-river-api.hf.space/predict"
62
+ data = {"message": "I need a boundary survey for my property"}
63
+ response = requests.post(url, json=data)
64
+ print(response.json())
65
+ ```
66
+
67
+ ## Local Development
68
+
69
+ ```bash
70
+ pip install -r requirements.txt
71
+ uvicorn app:app --host 0.0.0.0 --port 7860
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import torch
5
+ import requests
6
+ import logging
7
+
8
+ # Set up logging
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ app = FastAPI(
13
+ title="James River Survey Classification API",
14
+ description="API for classifying survey-related text messages into job types",
15
+ version="1.0.0"
16
+ )
17
+
18
+ # Request model
19
+ class PredictionRequest(BaseModel):
20
+ message: str
21
+
22
+ # Response model
23
+ class PredictionResponse(BaseModel):
24
+ label: str
25
+ confidence: float
26
+
27
+ # Global variables for model and tokenizer
28
+ model = None
29
+ tokenizer = None
30
+ label_mapping = None
31
+
32
+ @app.on_event("startup")
33
+ async def load_model():
34
+ """Load the model and tokenizer on startup"""
35
+ global model, tokenizer, label_mapping
36
+
37
+ try:
38
+ model_name = "ityndall/james-river-classifier"
39
+ logger.info(f"Loading model: {model_name}")
40
+
41
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
42
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
43
+
44
+ # Load label mapping
45
+ label_mapping_url = f"https://huggingface.co/{model_name}/resolve/main/label_mapping.json"
46
+ response = requests.get(label_mapping_url)
47
+ label_mapping = response.json()
48
+
49
+ logger.info("Model loaded successfully")
50
+ logger.info(f"Available labels: {list(label_mapping['id2label'].values())}")
51
+
52
+ except Exception as e:
53
+ logger.error(f"Error loading model: {str(e)}")
54
+ raise e
55
+
56
+ @app.get("/")
57
+ async def root():
58
+ """Root endpoint with API information"""
59
+ return {
60
+ "message": "James River Survey Classification API",
61
+ "version": "1.0.0",
62
+ "model": "ityndall/james-river-classifier",
63
+ "available_labels": list(label_mapping["id2label"].values()) if label_mapping else [],
64
+ "endpoints": {
65
+ "predict": "/predict - POST endpoint for text classification",
66
+ "health": "/health - GET endpoint for health check"
67
+ }
68
+ }
69
+
70
+ @app.get("/health")
71
+ async def health_check():
72
+ """Health check endpoint"""
73
+ if model is None or tokenizer is None or label_mapping is None:
74
+ raise HTTPException(status_code=503, detail="Model not loaded")
75
+ return {"status": "healthy", "model_loaded": True}
76
+
77
+ @app.post("/predict", response_model=PredictionResponse)
78
+ async def predict(request: PredictionRequest):
79
+ """Predict the survey job type for the given message"""
80
+
81
+ if model is None or tokenizer is None or label_mapping is None:
82
+ raise HTTPException(status_code=503, detail="Model not loaded")
83
+
84
+ try:
85
+ text = request.message.strip()
86
+ if not text:
87
+ raise HTTPException(status_code=400, detail="Message cannot be empty")
88
+
89
+ # Tokenize and predict
90
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
91
+
92
+ with torch.no_grad():
93
+ logits = model(**inputs).logits
94
+ probs = torch.nn.functional.softmax(logits, dim=-1)
95
+ predicted_class_id = probs.argmax().item()
96
+ confidence = probs[0][predicted_class_id].item()
97
+
98
+ # Get label
99
+ label = label_mapping["id2label"][str(predicted_class_id)]
100
+
101
+ logger.info(f"Prediction: '{text}' -> {label} (confidence: {confidence:.3f})")
102
+
103
+ return PredictionResponse(label=label, confidence=confidence)
104
+
105
+ except Exception as e:
106
+ logger.error(f"Error during prediction: {str(e)}")
107
+ raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")
108
+
109
+ # Legacy endpoint for backward compatibility
110
+ @app.post("/predict_legacy")
111
+ async def predict_legacy(request: Request):
112
+ """Legacy endpoint that accepts raw JSON (for backward compatibility)"""
113
+ try:
114
+ data = await request.json()
115
+ message = data.get("message", "")
116
+
117
+ if not message:
118
+ raise HTTPException(status_code=400, detail="Message field is required")
119
+
120
+ # Use the main predict function
121
+ prediction_request = PredictionRequest(message=message)
122
+ result = await predict(prediction_request)
123
+
124
+ return {"label": result.label, "confidence": result.confidence}
125
+
126
+ except Exception as e:
127
+ logger.error(f"Error in legacy endpoint: {str(e)}")
128
+ raise HTTPException(status_code=500, detail=str(e))
129
+
130
+ if __name__ == "__main__":
131
+ import uvicorn
132
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ transformers
4
+ torch
5
+ requests
6
+ pydantic