Spaces:
Sleeping
Sleeping
File size: 5,347 Bytes
f2a4578 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | import argparse
import json
import os
from typing import List, Dict, Any
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
def load_artifacts(prefix: str):
"""Load the trained model artifacts."""
model_path = f"{prefix}.json"
labels_path = f"{prefix}.labels.npy"
features_path = f"{prefix}.features.txt"
if not (os.path.exists(model_path) and os.path.exists(labels_path) and os.path.exists(features_path)):
raise FileNotFoundError(f"Missing artifacts. Expected: {model_path}, {labels_path}, {features_path}")
model = xgb.XGBClassifier()
model.load_model(model_path)
label_encoder = LabelEncoder()
classes = np.load(labels_path, allow_pickle=True)
label_encoder.classes_ = classes
with open(features_path, "r", encoding="utf-8") as f:
feature_names = [line.strip() for line in f if line.strip()]
return model, label_encoder, feature_names
def build_feature_vector(symptom_names: List[str], selected_symptoms: List[str]) -> np.ndarray:
"""Convert symptom list to feature vector."""
features = np.zeros(len(symptom_names), dtype=float)
name_to_index = {name.lower().strip(): idx for idx, name in enumerate(symptom_names)}
for symptom in selected_symptoms:
key = symptom.lower().strip()
if key in name_to_index:
features[name_to_index[key]] = 1.0
return features.reshape(1, -1)
def predict_symptoms_json(symptoms: List[str], model, label_encoder, feature_names: List[str]) -> Dict[str, Any]:
"""Return predictions in JSON format for API integration."""
if not symptoms:
return {"error": "No symptoms provided"}
# Build feature vector
x = build_feature_vector(feature_names, symptoms)
# Get predictions
proba = model.predict_proba(x)[0]
top3_idx = np.argsort(proba)[-3:][::-1]
# Format results
predictions = []
for rank, idx in enumerate(top3_idx, 1):
disease_name = label_encoder.inverse_transform([idx])[0]
confidence = float(proba[idx])
predictions.append({
"rank": rank,
"disease": disease_name,
"confidence": confidence,
"confidence_percent": round(confidence * 100, 2)
})
return {
"input_symptoms": symptoms,
"primary_diagnosis": predictions[0],
"top_predictions": predictions,
"model_confidence": "high" if predictions[0]["confidence"] > 0.7 else "medium" if predictions[0]["confidence"] > 0.4 else "low"
}
def predict_symptoms_csv(symptoms: List[str], model, label_encoder, feature_names: List[str]) -> str:
"""Return predictions in CSV format."""
if not symptoms:
return "error,No symptoms provided"
x = build_feature_vector(feature_names, symptoms)
proba = model.predict_proba(x)[0]
top3_idx = np.argsort(proba)[-3:][::-1]
csv_lines = ["rank,disease,confidence,confidence_percent"]
for rank, idx in enumerate(top3_idx, 1):
disease_name = label_encoder.inverse_transform([idx])[0]
confidence = proba[idx]
csv_lines.append(f"{rank},{disease_name},{confidence:.4f},{confidence*100:.2f}")
return "\n".join(csv_lines)
def predict_symptoms_simple(symptoms: List[str], model, label_encoder, feature_names: List[str]) -> str:
"""Return simple text format."""
if not symptoms:
return "Error: No symptoms provided"
x = build_feature_vector(feature_names, symptoms)
proba = model.predict_proba(x)[0]
top1_idx = np.argmax(proba)
disease_name = label_encoder.inverse_transform([top1_idx])[0]
confidence = proba[top1_idx]
return f"Diagnosis: {disease_name} (Confidence: {confidence*100:.1f}%)"
def main():
parser = argparse.ArgumentParser(description="API-style symptom checker using saved model")
parser.add_argument("--symptoms", nargs="+", required=True, help="List of symptoms")
parser.add_argument("--format", choices=["json", "csv", "simple"], default="json", help="Output format")
parser.add_argument("--artifacts-prefix", default="symptom_checker/symptom_model", help="Path to model artifacts")
args = parser.parse_args()
try:
# Load the trained model
model, label_encoder, feature_names = load_artifacts(args.artifacts_prefix)
# Get predictions in requested format
if args.format == "json":
result = predict_symptoms_json(args.symptoms, model, label_encoder, feature_names)
print(json.dumps(result, indent=2))
elif args.format == "csv":
result = predict_symptoms_csv(args.symptoms, model, label_encoder, feature_names)
print(result)
elif args.format == "simple":
result = predict_symptoms_simple(args.symptoms, model, label_encoder, feature_names)
print(result)
except Exception as e:
error_result = {"error": str(e), "input_symptoms": args.symptoms}
if args.format == "json":
print(json.dumps(error_result, indent=2))
else:
print(f"Error: {e}")
if __name__ == "__main__":
main()
|