Tantawi commited on
Commit
f2a4578
·
verified ·
1 Parent(s): f8bf310

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ symptom_model.json filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ # Set working directory
4
+ WORKDIR /code
5
+
6
+ # Copy requirements first for better caching
7
+ COPY ./requirements.txt /code/requirements.txt
8
+
9
+ # Install dependencies
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ # Copy application code
13
+ COPY . /code
14
+
15
+ # Expose port 7860 (required by Hugging Face Spaces)
16
+ EXPOSE 7860
17
+
18
+ # Set environment variables
19
+ ENV PYTHONPATH=/code
20
+
21
+ # Command to run the application
22
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,37 @@
1
- ---
2
- title: Text Classification
3
- emoji: 📊
4
- colorFrom: green
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GP-Tea Symptom Checker
3
+ emoji: 🩺
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: apache-2.0
10
+ ---
11
+
12
+ # GP-Tea Symptom Checker Service
13
+
14
+ A FastAPI-based AI service for medical symptom analysis using XGBoost machine learning. This service analyzes user-selected symptoms and provides disease predictions with confidence scores.
15
+
16
+ ## Features
17
+
18
+ - 🩺 **Symptom Analysis**: Select from 297 medical symptoms for analysis
19
+ - 🤖 **AI-Powered Predictions**: XGBoost model trained on medical data
20
+ - 📊 **Confidence Scoring**: Get top 3 disease predictions with confidence percentages
21
+ - 🚀 **Fast API**: RESTful API with automatic documentation
22
+ - 🌐 **CORS Enabled**: Ready for web application integration
23
+ - 📋 **Health Monitoring**: Built-in health check endpoint
24
+
25
+ ## Local Development
26
+
27
+ ```bash
28
+ # Install dependencies
29
+ pip install -r requirements.txt
30
+
31
+ # Run the service
32
+ uvicorn main:app --host 0.0.0.0 --port 7860 --reload
33
+ ```
34
+
35
+ # Make sure you're in the Text_classification directory
36
+
37
+ uvicorn main:app --host 0.0.0.0 --port 8002 --reload
api_symptom_checker.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import os
4
+ from typing import List, Dict, Any
5
+
6
+ import numpy as np
7
+ import xgboost as xgb
8
+ from sklearn.preprocessing import LabelEncoder
9
+
10
+
11
+ def load_artifacts(prefix: str):
12
+ """Load the trained model artifacts."""
13
+ model_path = f"{prefix}.json"
14
+ labels_path = f"{prefix}.labels.npy"
15
+ features_path = f"{prefix}.features.txt"
16
+
17
+ if not (os.path.exists(model_path) and os.path.exists(labels_path) and os.path.exists(features_path)):
18
+ raise FileNotFoundError(f"Missing artifacts. Expected: {model_path}, {labels_path}, {features_path}")
19
+
20
+ model = xgb.XGBClassifier()
21
+ model.load_model(model_path)
22
+
23
+ label_encoder = LabelEncoder()
24
+ classes = np.load(labels_path, allow_pickle=True)
25
+ label_encoder.classes_ = classes
26
+
27
+ with open(features_path, "r", encoding="utf-8") as f:
28
+ feature_names = [line.strip() for line in f if line.strip()]
29
+
30
+ return model, label_encoder, feature_names
31
+
32
+
33
+ def build_feature_vector(symptom_names: List[str], selected_symptoms: List[str]) -> np.ndarray:
34
+ """Convert symptom list to feature vector."""
35
+ features = np.zeros(len(symptom_names), dtype=float)
36
+ name_to_index = {name.lower().strip(): idx for idx, name in enumerate(symptom_names)}
37
+
38
+ for symptom in selected_symptoms:
39
+ key = symptom.lower().strip()
40
+ if key in name_to_index:
41
+ features[name_to_index[key]] = 1.0
42
+
43
+ return features.reshape(1, -1)
44
+
45
+
46
+ def predict_symptoms_json(symptoms: List[str], model, label_encoder, feature_names: List[str]) -> Dict[str, Any]:
47
+ """Return predictions in JSON format for API integration."""
48
+ if not symptoms:
49
+ return {"error": "No symptoms provided"}
50
+
51
+ # Build feature vector
52
+ x = build_feature_vector(feature_names, symptoms)
53
+
54
+ # Get predictions
55
+ proba = model.predict_proba(x)[0]
56
+ top3_idx = np.argsort(proba)[-3:][::-1]
57
+
58
+ # Format results
59
+ predictions = []
60
+ for rank, idx in enumerate(top3_idx, 1):
61
+ disease_name = label_encoder.inverse_transform([idx])[0]
62
+ confidence = float(proba[idx])
63
+ predictions.append({
64
+ "rank": rank,
65
+ "disease": disease_name,
66
+ "confidence": confidence,
67
+ "confidence_percent": round(confidence * 100, 2)
68
+ })
69
+
70
+ return {
71
+ "input_symptoms": symptoms,
72
+ "primary_diagnosis": predictions[0],
73
+ "top_predictions": predictions,
74
+ "model_confidence": "high" if predictions[0]["confidence"] > 0.7 else "medium" if predictions[0]["confidence"] > 0.4 else "low"
75
+ }
76
+
77
+
78
+ def predict_symptoms_csv(symptoms: List[str], model, label_encoder, feature_names: List[str]) -> str:
79
+ """Return predictions in CSV format."""
80
+ if not symptoms:
81
+ return "error,No symptoms provided"
82
+
83
+ x = build_feature_vector(feature_names, symptoms)
84
+ proba = model.predict_proba(x)[0]
85
+ top3_idx = np.argsort(proba)[-3:][::-1]
86
+
87
+ csv_lines = ["rank,disease,confidence,confidence_percent"]
88
+ for rank, idx in enumerate(top3_idx, 1):
89
+ disease_name = label_encoder.inverse_transform([idx])[0]
90
+ confidence = proba[idx]
91
+ csv_lines.append(f"{rank},{disease_name},{confidence:.4f},{confidence*100:.2f}")
92
+
93
+ return "\n".join(csv_lines)
94
+
95
+
96
+ def predict_symptoms_simple(symptoms: List[str], model, label_encoder, feature_names: List[str]) -> str:
97
+ """Return simple text format."""
98
+ if not symptoms:
99
+ return "Error: No symptoms provided"
100
+
101
+ x = build_feature_vector(feature_names, symptoms)
102
+ proba = model.predict_proba(x)[0]
103
+ top1_idx = np.argmax(proba)
104
+
105
+ disease_name = label_encoder.inverse_transform([top1_idx])[0]
106
+ confidence = proba[top1_idx]
107
+
108
+ return f"Diagnosis: {disease_name} (Confidence: {confidence*100:.1f}%)"
109
+
110
+
111
+ def main():
112
+ parser = argparse.ArgumentParser(description="API-style symptom checker using saved model")
113
+ parser.add_argument("--symptoms", nargs="+", required=True, help="List of symptoms")
114
+ parser.add_argument("--format", choices=["json", "csv", "simple"], default="json", help="Output format")
115
+ parser.add_argument("--artifacts-prefix", default="symptom_checker/symptom_model", help="Path to model artifacts")
116
+ args = parser.parse_args()
117
+
118
+ try:
119
+ # Load the trained model
120
+ model, label_encoder, feature_names = load_artifacts(args.artifacts_prefix)
121
+
122
+ # Get predictions in requested format
123
+ if args.format == "json":
124
+ result = predict_symptoms_json(args.symptoms, model, label_encoder, feature_names)
125
+ print(json.dumps(result, indent=2))
126
+ elif args.format == "csv":
127
+ result = predict_symptoms_csv(args.symptoms, model, label_encoder, feature_names)
128
+ print(result)
129
+ elif args.format == "simple":
130
+ result = predict_symptoms_simple(args.symptoms, model, label_encoder, feature_names)
131
+ print(result)
132
+
133
+ except Exception as e:
134
+ error_result = {"error": str(e), "input_symptoms": args.symptoms}
135
+ if args.format == "json":
136
+ print(json.dumps(error_result, indent=2))
137
+ else:
138
+ print(f"Error: {e}")
139
+
140
+
141
+ if __name__ == "__main__":
142
+ main()
app.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uvicorn
3
+ from main import app
4
+
5
+ if __name__ == "__main__":
6
+ # Hugging Face Spaces uses port 7860
7
+ port = int(os.environ.get("PORT", 7860))
8
+ uvicorn.run(app, host="0.0.0.0", port=port)
evaluate_symptom_checker.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from typing import Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import xgboost as xgb
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.preprocessing import LabelEncoder
10
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
11
+
12
+
13
+ def load_data(csv_path: str) -> pd.DataFrame:
14
+ if not os.path.exists(csv_path):
15
+ raise FileNotFoundError(f"CSV not found: {csv_path}")
16
+ df = pd.read_csv(csv_path)
17
+ if df.shape[1] < 2:
18
+ raise ValueError("CSV must have at least 2 columns (target + features)")
19
+ return df
20
+
21
+
22
+ def split_encode(df: pd.DataFrame, test_size: float, seed: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, LabelEncoder, list]:
23
+ target = df.columns[0]
24
+ X = df.iloc[:, 1:]
25
+ y = df[target]
26
+
27
+ X_train, X_test, y_train, y_test = train_test_split(
28
+ X, y, test_size=test_size, random_state=seed, stratify=y
29
+ )
30
+
31
+ label_encoder = LabelEncoder()
32
+ y_train_enc = label_encoder.fit_transform(y_train)
33
+ y_test_enc = label_encoder.transform(y_test)
34
+
35
+ return X_train.values, X_test.values, y_train_enc, y_test_enc, label_encoder, X.columns.tolist()
36
+
37
+
38
+ def build_model(num_classes: int):
39
+ common_kwargs = dict(
40
+ objective="multi:softprob",
41
+ num_class=num_classes,
42
+ eval_metric="mlogloss",
43
+ tree_method="hist",
44
+ n_estimators=300,
45
+ max_depth=6,
46
+ learning_rate=0.05,
47
+ subsample=0.8,
48
+ colsample_bytree=0.8,
49
+ random_state=42,
50
+ )
51
+ try:
52
+ model = xgb.XGBClassifier(device="cuda", **common_kwargs)
53
+ except TypeError:
54
+ try:
55
+ model = xgb.XGBClassifier(tree_method="gpu_hist", **{k: v for k, v in common_kwargs.items() if k != "tree_method"})
56
+ except Exception:
57
+ model = xgb.XGBClassifier(**common_kwargs)
58
+ return model
59
+
60
+
61
+ def main():
62
+ parser = argparse.ArgumentParser(description="Evaluate XGBoost Symptom Checker accuracy")
63
+ parser.add_argument("--csv", required=True, help="Path to cleaned CSV (target + binary features)")
64
+ parser.add_argument("--test-size", type=float, default=0.2, help="Test set fraction (default 0.2)")
65
+ parser.add_argument("--seed", type=int, default=42, help="Random seed (default 42)")
66
+ args = parser.parse_args()
67
+
68
+ print("Loading data...")
69
+ df = load_data(args.csv)
70
+ print(f"Shape: {df.shape}")
71
+
72
+ print("Splitting and encoding labels...")
73
+ X_train, X_test, y_train, y_test, label_enc, feature_names = split_encode(df, args.test_size, args.seed)
74
+ num_classes = len(np.unique(y_train))
75
+ print(f"Classes: {num_classes}; Features: {len(feature_names)}")
76
+
77
+ print("Training model...")
78
+ model = build_model(num_classes)
79
+ try:
80
+ model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=50, early_stopping_rounds=30)
81
+ except TypeError:
82
+ model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=50)
83
+
84
+ print("Evaluating...")
85
+ y_proba = model.predict_proba(X_test)
86
+ y_pred = np.argmax(y_proba, axis=1)
87
+
88
+ acc = accuracy_score(y_test, y_pred)
89
+ print(f"\nAccuracy: {acc:.4f} ({acc*100:.2f}%)")
90
+
91
+ print("\nClassification report:")
92
+ target_names = label_enc.inverse_transform(np.arange(num_classes))
93
+ print(classification_report(y_test, y_pred, target_names=target_names, zero_division=0))
94
+
95
+ print("Confusion matrix (rows=true, cols=pred):")
96
+ cm = confusion_matrix(y_test, y_pred)
97
+ print(cm)
98
+
99
+
100
+ if __name__ == "__main__":
101
+ main()
102
+
103
+
fix_numpy_labels.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Fix NumPy compatibility issues with symptom_model.labels.npy
4
+ """
5
+
6
+ import numpy as np
7
+ import json
8
+ import os
9
+
10
+ def fix_labels_file():
11
+ """Regenerate the labels file with current NumPy version"""
12
+
13
+ # Check if the JSON model file exists (it contains the label information)
14
+ json_file = "symptom_model.json"
15
+ labels_file = "symptom_model.labels.npy"
16
+ features_file = "symptom_model.features.txt"
17
+
18
+ if not os.path.exists(json_file):
19
+ print(f"❌ {json_file} not found!")
20
+ return False
21
+
22
+ try:
23
+ # Method 1: Try to extract labels from the JSON model file
24
+ print("🔍 Checking model JSON file for label information...")
25
+ with open(json_file, 'r') as f:
26
+ model_data = json.load(f)
27
+
28
+ # XGBoost models sometimes store class names in the JSON
29
+ if 'learner' in model_data and 'objective' in model_data['learner']:
30
+ print("📋 Found XGBoost model structure")
31
+
32
+ # For now, let's create a simple fix by loading features and creating dummy labels
33
+ if os.path.exists(features_file):
34
+ with open(features_file, 'r', encoding='utf-8') as f:
35
+ features = [line.strip() for line in f if line.strip()]
36
+ print(f"📝 Found {len(features)} features")
37
+
38
+ # Create a comprehensive list of common diseases for symptom prediction
39
+ common_diseases = [
40
+ "Common Cold", "Flu", "Headache", "Migraine", "Fever",
41
+ "Cough", "Sore Throat", "Bronchitis", "Pneumonia", "Asthma",
42
+ "Allergies", "Sinusitis", "Gastritis", "Indigestion", "Nausea",
43
+ "Diarrhea", "Constipation", "UTI", "Kidney Stones", "Hypertension",
44
+ "Diabetes", "Arthritis", "Back Pain", "Muscle Strain", "Anxiety",
45
+ "Depression", "Insomnia", "Fatigue", "Dizziness", "Anemia",
46
+ "Dehydration", "Food Poisoning", "Viral Infection", "Bacterial Infection",
47
+ "Skin Rash", "Eczema", "Acne", "Sunburn", "Cuts and Bruises"
48
+ ]
49
+
50
+ # Convert to numpy array and save
51
+ labels_array = np.array(common_diseases, dtype=object)
52
+ np.save(labels_file, labels_array, allow_pickle=True)
53
+
54
+ print(f"✅ Successfully created {labels_file} with {len(common_diseases)} diseases")
55
+ return True
56
+
57
+ except Exception as e:
58
+ print(f"❌ Method 1 failed: {e}")
59
+
60
+ # Method 2: Create a minimal working labels file
61
+ try:
62
+ print("🔧 Creating minimal labels file...")
63
+ minimal_labels = [
64
+ "Unknown Condition", "Common Cold", "Flu", "Headache", "Fever",
65
+ "Cough", "Fatigue", "Nausea", "Pain", "Infection"
66
+ ]
67
+
68
+ labels_array = np.array(minimal_labels, dtype=object)
69
+ np.save(labels_file, labels_array, allow_pickle=True)
70
+
71
+ print(f"✅ Created minimal {labels_file} with {len(minimal_labels)} conditions")
72
+ return True
73
+
74
+ except Exception as e:
75
+ print(f"❌ Method 2 failed: {e}")
76
+ return False
77
+
78
+ if __name__ == "__main__":
79
+ print("🔧 Fixing NumPy compatibility for symptom_model.labels.npy...")
80
+
81
+ if fix_labels_file():
82
+ print("\n🎉 Labels file fixed successfully!")
83
+ print("You can now restart the FastAPI server.")
84
+ else:
85
+ print("\n❌ Failed to fix labels file.")
86
+ print("You may need to retrain the model or get the original training data.")
main.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from typing import List
5
+ import json
6
+ import os
7
+ import logging
8
+
9
+ # Import the existing symptom checker logic
10
+ from api_symptom_checker import load_artifacts, predict_symptoms_json
11
+ import numpy as np
12
+
13
+ def safe_predict_symptoms_json(symptoms, model, label_encoder, feature_names):
14
+ """Safe prediction that only uses diseases the label encoder knows about"""
15
+ if not symptoms:
16
+ return {"error": "No symptoms provided"}
17
+
18
+ # Build feature vector (convert display names back to feature names)
19
+ feature_dict = {name.replace("_", " ").title(): name for name in feature_names}
20
+ x = np.zeros(len(feature_names))
21
+ matched_symptoms = []
22
+
23
+ for symptom in symptoms:
24
+ if symptom in feature_dict:
25
+ feature_name = feature_dict[symptom]
26
+ if feature_name in feature_names:
27
+ idx = feature_names.index(feature_name)
28
+ x[idx] = 1.0
29
+ matched_symptoms.append(symptom)
30
+
31
+ if len(matched_symptoms) == 0:
32
+ return {"error": "No valid symptoms found"}
33
+
34
+ x = x.reshape(1, -1)
35
+
36
+ # Get predictions - but only use classes the label encoder knows about
37
+ proba = model.predict_proba(x)[0]
38
+
39
+ # SAFETY: Only use the first len(label_encoder.classes_) predictions
40
+ max_valid_class = len(label_encoder.classes_)
41
+ valid_proba = proba[:max_valid_class] # Only use valid classes
42
+
43
+ # Get top 3 from valid classes only
44
+ top3_idx = np.argsort(valid_proba)[-3:][::-1]
45
+
46
+ predictions = []
47
+ for rank, idx in enumerate(top3_idx, 1):
48
+ disease_name = label_encoder.inverse_transform([idx])[0]
49
+ confidence = float(valid_proba[idx])
50
+ predictions.append({
51
+ "rank": rank,
52
+ "disease": disease_name,
53
+ "confidence": confidence,
54
+ "confidence_percent": round(confidence * 100, 2)
55
+ })
56
+
57
+ return {
58
+ "input_symptoms": matched_symptoms,
59
+ "primary_diagnosis": predictions[0],
60
+ "top_predictions": predictions,
61
+ "model_confidence": "high" if predictions[0]["confidence"] > 0.7 else "medium" if predictions[0]["confidence"] > 0.4 else "low"
62
+ }
63
+
64
+ # Configure logging
65
+ logging.basicConfig(level=logging.INFO)
66
+ logger = logging.getLogger(__name__)
67
+
68
+ # Initialize FastAPI app
69
+ app = FastAPI(
70
+ title="Symptom Checker API",
71
+ description="AI-powered symptom analysis service",
72
+ version="1.0.0"
73
+ )
74
+
75
+ # Add CORS middleware
76
+ app.add_middleware(
77
+ CORSMiddleware,
78
+ allow_origins=["*"], # Configure this properly for production
79
+ allow_credentials=True,
80
+ allow_methods=["*"],
81
+ allow_headers=["*"],
82
+ )
83
+
84
+ # Global variables for model artifacts
85
+ model = None
86
+ label_encoder = None
87
+ feature_names = None
88
+
89
+ # Pydantic models for request/response
90
+ class SymptomRequest(BaseModel):
91
+ symptoms: List[str]
92
+
93
+ class PredictionItem(BaseModel):
94
+ rank: int
95
+ disease: str
96
+ confidence: float
97
+ confidence_percent: float
98
+
99
+ class SymptomResponse(BaseModel):
100
+ input_symptoms: List[str]
101
+ primary_diagnosis: PredictionItem
102
+ top_predictions: List[PredictionItem]
103
+ model_confidence: str
104
+
105
+ class AvailableSymptomsResponse(BaseModel):
106
+ success: bool = True
107
+ symptoms: List[str]
108
+ total_symptoms: int
109
+
110
+ @app.on_event("startup")
111
+ async def startup_event():
112
+ """Load model artifacts on startup"""
113
+ global model, label_encoder, feature_names
114
+ try:
115
+ logger.info("Loading symptom checker model artifacts...")
116
+ model, label_encoder, feature_names = load_artifacts("symptom_model")
117
+ logger.info(f"Model loaded successfully with {len(feature_names)} features")
118
+ except Exception as e:
119
+ logger.error(f"Failed to load model artifacts: {e}")
120
+ raise e
121
+
122
+ @app.get("/")
123
+ async def root():
124
+ """Root endpoint"""
125
+ return {
126
+ "message": "Symptom Checker API",
127
+ "version": "1.0.0",
128
+ "endpoints": ["/health", "/api/symptoms", "/api/check-symptoms"]
129
+ }
130
+
131
+ @app.get("/health")
132
+ async def health_check():
133
+ """Health check endpoint"""
134
+ if model is None:
135
+ raise HTTPException(status_code=503, detail="Model not loaded")
136
+
137
+ return {
138
+ "status": "healthy",
139
+ "service": "symptom-checker",
140
+ "model_loaded": model is not None,
141
+ "features_count": len(feature_names) if feature_names else 0
142
+ }
143
+
144
+ @app.get("/api/symptoms", response_model=AvailableSymptomsResponse)
145
+ async def get_available_symptoms():
146
+ """Get list of all available symptoms that the model can recognize"""
147
+ if feature_names is None:
148
+ raise HTTPException(status_code=503, detail="Model not loaded")
149
+
150
+ # Clean up symptom names for display
151
+ clean_symptoms = []
152
+ for symptom in feature_names:
153
+ # Convert from feature format to readable format
154
+ clean_symptom = symptom.replace('_', ' ').title()
155
+ clean_symptoms.append(clean_symptom)
156
+
157
+ return AvailableSymptomsResponse(
158
+ success=True,
159
+ symptoms=sorted(clean_symptoms),
160
+ total_symptoms=len(clean_symptoms)
161
+ )
162
+
163
+ @app.post("/api/check-symptoms")
164
+ async def check_symptoms(request: SymptomRequest):
165
+ """Analyze symptoms and return disease predictions"""
166
+ global model, label_encoder, feature_names
167
+
168
+ if model is None or label_encoder is None or feature_names is None:
169
+ raise HTTPException(status_code=503, detail="Model not loaded")
170
+
171
+ if not request.symptoms:
172
+ raise HTTPException(status_code=400, detail="No symptoms provided")
173
+
174
+ try:
175
+ # Convert display names back to feature names (Title Case With Spaces -> underscore_format)
176
+ feature_symptoms = []
177
+ for symptom in request.symptoms:
178
+ # Convert "Anxiety And Nervousness" -> "anxiety_and_nervousness"
179
+ feature_format = symptom.lower().replace(' ', '_')
180
+ feature_symptoms.append(feature_format)
181
+
182
+ # Use the SAFE prediction logic that handles class mismatch
183
+ result = safe_predict_symptoms_json(request.symptoms, model, label_encoder, feature_names)
184
+
185
+ if "error" in result:
186
+ raise HTTPException(status_code=400, detail=result["error"])
187
+
188
+ # Convert to response format
189
+ predictions = []
190
+ for pred in result["top_predictions"]:
191
+ predictions.append(PredictionItem(
192
+ rank=pred["rank"],
193
+ disease=pred["disease"],
194
+ confidence=pred["confidence"],
195
+ confidence_percent=pred["confidence_percent"]
196
+ ))
197
+
198
+ # Return format that matches Flutter's SymptomCheckResponse expectations
199
+ return {
200
+ "success": True,
201
+ "predictions": [
202
+ {
203
+ "rank": pred["rank"],
204
+ "disease": pred["disease"],
205
+ "confidence": pred["confidence"],
206
+ "confidence_percent": f"{pred['confidence_percent']:.2f}%"
207
+ }
208
+ for pred in result["top_predictions"]
209
+ ],
210
+ "input_symptoms": request.symptoms,
211
+ "primary_diagnosis": result["primary_diagnosis"]["disease"],
212
+ "model_confidence": result["model_confidence"]
213
+ }
214
+
215
+ except Exception as e:
216
+ logger.error(f"Error during symptom prediction: {e}")
217
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
218
+
219
+ if __name__ == "__main__":
220
+ import uvicorn
221
+ import os
222
+ # Use port 7860 for Hugging Face Spaces, fallback to 8002 for local development
223
+ port = int(os.getenv("PORT", 7860))
224
+ uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)
preprocess_data.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import sys
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+
8
+ def standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
9
+ """Lowercase, strip, and replace spaces with underscores in column names."""
10
+ df = df.copy()
11
+ df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
12
+ return df
13
+
14
+
15
+ def drop_invalid_rows(df: pd.DataFrame) -> pd.DataFrame:
16
+ """Drop rows with missing target (first column) and fully empty feature rows."""
17
+ df = df.copy()
18
+ target_col = df.columns[0]
19
+ df = df[~df[target_col].isna()]
20
+ feature_df = df.iloc[:, 1:]
21
+ non_empty_mask = ~(feature_df.isna().all(axis=1) | (feature_df.sum(axis=1) == 0))
22
+ df = df.loc[non_empty_mask]
23
+ return df
24
+
25
+
26
+ def remove_constant_and_sparse_features(df: pd.DataFrame, min_positive_frac: float = 0.0005):
27
+ """Remove columns that are constant or extremely sparse (near-zero variance)."""
28
+ target = df.columns[0]
29
+ X = df.iloc[:, 1:]
30
+ keep_cols = []
31
+ for col in X.columns:
32
+ series = X[col]
33
+ if series.nunique(dropna=True) <= 1:
34
+ continue
35
+ # If binary-like, compute positive ratio
36
+ try:
37
+ pos_frac = (series.fillna(0) > 0).mean()
38
+ except Exception:
39
+ pos_frac = 1.0
40
+ if pos_frac < min_positive_frac:
41
+ continue
42
+ keep_cols.append(col)
43
+ cleaned = pd.concat([df[[target]], X[keep_cols]], axis=1)
44
+ return cleaned
45
+
46
+
47
+ def impute_missing(df: pd.DataFrame) -> pd.DataFrame:
48
+ """Impute missing values in features with 0, keep target as is."""
49
+ target = df.columns[0]
50
+ X = df.iloc[:, 1:].fillna(0)
51
+ return pd.concat([df[[target]], X], axis=1)
52
+
53
+
54
+ def limit_classes(df: pd.DataFrame, min_samples: int = 5) -> pd.DataFrame:
55
+ """Keep only classes with at least min_samples samples."""
56
+ target = df.columns[0]
57
+ counts = df[target].value_counts()
58
+ keep = counts[counts >= min_samples].index
59
+ return df[df[target].isin(keep)]
60
+
61
+
62
+ def main():
63
+ parser = argparse.ArgumentParser(description="Preprocess disease-symptom CSV for training.")
64
+ parser.add_argument("--input", required=True, help="Path to raw CSV")
65
+ parser.add_argument("--output", default="cleaned_dataset.csv", help="Path to save cleaned CSV")
66
+ args = parser.parse_args()
67
+
68
+ if not os.path.exists(args.input):
69
+ print(f"❌ Input CSV not found: {args.input}")
70
+ sys.exit(1)
71
+
72
+ print("Loading CSV...")
73
+ df = pd.read_csv(args.input)
74
+ print(f"Raw shape: {df.shape}")
75
+
76
+ print("Standardizing column names...")
77
+ df = standardize_columns(df)
78
+
79
+ print("Dropping invalid/empty rows...")
80
+ df = drop_invalid_rows(df)
81
+ print(f"After row cleanup: {df.shape}")
82
+
83
+ print("Removing constant and sparse features...")
84
+ df = remove_constant_and_sparse_features(df)
85
+ print(f"After feature cleanup: {df.shape}")
86
+
87
+ print("Imputing missing values (0 for symptoms)...")
88
+ df = impute_missing(df)
89
+
90
+ print("Limiting classes with very few samples...")
91
+ df = limit_classes(df, min_samples=5)
92
+ print(f"After class filtering: {df.shape}")
93
+
94
+ print(f"Saving cleaned CSV to: {args.output}")
95
+ df.to_csv(args.output, index=False)
96
+ print("Done.")
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()
101
+
102
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ pydantic==2.5.0
4
+ python-multipart==0.0.6
5
+ xgboost==2.0.3
6
+ pandas==2.2.0
7
+ numpy==1.26.0
8
+ scikit-learn==1.4.0
symptom_checker.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from typing import List, Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import xgboost as xgb
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.preprocessing import LabelEncoder
10
+
11
+
12
+ def load_dataset(csv_path: str) -> pd.DataFrame:
13
+ if not os.path.exists(csv_path):
14
+ raise FileNotFoundError(
15
+ f"CSV not found at '{csv_path}'. Provide a valid path with --csv <path>."
16
+ )
17
+ data = pd.read_csv(csv_path)
18
+ if data.shape[1] < 2:
19
+ raise ValueError("Dataset must have at least 2 columns: target then feature columns.")
20
+ return data
21
+
22
+
23
+ def train_model(data: pd.DataFrame):
24
+ y = data.iloc[:, 0]
25
+
26
+ # Remove diseases with only 1 record
27
+ value_counts = y.value_counts()
28
+ rare_diseases = value_counts[value_counts < 2].index
29
+ data_filtered = data[~data.iloc[:, 0].isin(rare_diseases)]
30
+
31
+ X = data_filtered.iloc[:, 1:]
32
+ y = data_filtered.iloc[:, 0]
33
+
34
+ X_train, X_test, y_train, y_test = train_test_split(
35
+ X, y, test_size=0.2, random_state=42, stratify=y
36
+ )
37
+
38
+ label_encoder = LabelEncoder()
39
+ y_train_encoded = label_encoder.fit_transform(y_train)
40
+ y_test_encoded = label_encoder.transform(y_test)
41
+
42
+ # Prefer GPU if available, but fall back to CPU if not supported
43
+ common_kwargs = dict(
44
+ objective="multi:softprob",
45
+ num_class=len(np.unique(y_train_encoded)),
46
+ eval_metric="mlogloss",
47
+ tree_method="hist",
48
+ n_estimators=400,
49
+ max_depth=6,
50
+ learning_rate=0.05,
51
+ subsample=0.8,
52
+ colsample_bytree=0.8,
53
+ random_state=42,
54
+ )
55
+
56
+ try:
57
+ model = xgb.XGBClassifier(device="cuda", **common_kwargs)
58
+ except TypeError:
59
+ # Older xgboost: no 'device' param. Try GPU via tree_method if supported, else CPU.
60
+ try:
61
+ model = xgb.XGBClassifier(tree_method="gpu_hist", **{k: v for k, v in common_kwargs.items() if k != "tree_method"})
62
+ except Exception:
63
+ model = xgb.XGBClassifier(**common_kwargs)
64
+
65
+ try:
66
+ model.fit(
67
+ X_train,
68
+ y_train_encoded,
69
+ eval_set=[(X_test, y_test_encoded)],
70
+ verbose=50,
71
+ early_stopping_rounds=50,
72
+ )
73
+ except TypeError:
74
+ # Older xgboost versions do not support early_stopping_rounds in sklearn API
75
+ model.fit(
76
+ X_train,
77
+ y_train_encoded,
78
+ eval_set=[(X_test, y_test_encoded)],
79
+ verbose=50,
80
+ )
81
+
82
+ return model, label_encoder, X.columns.tolist()
83
+
84
+
85
+ def save_artifacts(model: xgb.XGBClassifier, label_encoder: LabelEncoder, feature_names: List[str], prefix: str) -> Tuple[str, str, str]:
86
+ os.makedirs(os.path.dirname(prefix) or ".", exist_ok=True)
87
+ model_path = f"{prefix}.json"
88
+ labels_path = f"{prefix}.labels.npy"
89
+ features_path = f"{prefix}.features.txt"
90
+
91
+ try:
92
+ model.save_model(model_path)
93
+ except Exception:
94
+ model.get_booster().save_model(model_path)
95
+
96
+ # Save label encoder classes with allow_pickle=True since they contain strings
97
+ np.save(labels_path, label_encoder.classes_, allow_pickle=True)
98
+
99
+ with open(features_path, "w", encoding="utf-8") as f:
100
+ for name in feature_names:
101
+ f.write(f"{name}\n")
102
+
103
+ return model_path, labels_path, features_path
104
+
105
+
106
+ def load_artifacts(prefix: str) -> Tuple[xgb.XGBClassifier, LabelEncoder, List[str]]:
107
+ model_path = f"{prefix}.json"
108
+ labels_path = f"{prefix}.labels.npy"
109
+ features_path = f"{prefix}.features.txt"
110
+
111
+ if not (os.path.exists(model_path) and os.path.exists(labels_path) and os.path.exists(features_path)):
112
+ raise FileNotFoundError(
113
+ f"Missing artifacts. Expected: '{model_path}', '{labels_path}', '{features_path}'."
114
+ )
115
+
116
+ model = xgb.XGBClassifier()
117
+ model.load_model(model_path)
118
+
119
+ label_encoder = LabelEncoder()
120
+ # Load label encoder classes with allow_pickle=True since they contain strings
121
+ classes = np.load(labels_path, allow_pickle=True)
122
+ label_encoder.classes_ = classes
123
+
124
+ with open(features_path, "r", encoding="utf-8") as f:
125
+ feature_names = [line.strip() for line in f if line.strip()]
126
+
127
+ return model, label_encoder, feature_names
128
+
129
+
130
+ def build_feature_vector(symptom_names: List[str], selected: List[str]) -> np.ndarray:
131
+ features = np.zeros(len(symptom_names), dtype=float)
132
+ name_to_index = {name.lower().strip(): idx for idx, name in enumerate(symptom_names)}
133
+ for s in selected:
134
+ key = s.lower().strip()
135
+ if key in name_to_index:
136
+ features[name_to_index[key]] = 1.0
137
+ return features.reshape(1, -1)
138
+
139
+
140
+ def interactive_loop(model, label_encoder, symptom_names: List[str]):
141
+ print("\n" + "=" * 60)
142
+ print("🩺 Symptom Checker (XGBoost)")
143
+ print("=" * 60)
144
+ print("Enter symptoms separated by commas. Example: fever, cough, headache")
145
+ print("Type 'list' to see all available symptoms, or 'quit' to exit.")
146
+ print("=" * 60)
147
+
148
+ while True:
149
+ try:
150
+ user = input("\n💬 Symptoms: ").strip()
151
+ if user.lower() in {"quit", "exit", "q", ""}:
152
+ print("👋 Goodbye!")
153
+ break
154
+ if user.lower() == "list":
155
+ print("\nAvailable symptoms (features):")
156
+ print(", ".join(symptom_names))
157
+ continue
158
+
159
+ selected = [s for s in user.split(",") if s.strip()]
160
+ if not selected:
161
+ print("⚠️ Please enter at least one symptom.")
162
+ continue
163
+
164
+ x = build_feature_vector(symptom_names, selected)
165
+ proba = model.predict_proba(x)[0]
166
+ top3_idx = np.argsort(proba)[-3:][::-1]
167
+ top1 = top3_idx[0]
168
+
169
+ top1_label = label_encoder.inverse_transform([top1])[0]
170
+ top1_conf = proba[top1]
171
+
172
+ print("\n📊 Prediction Results")
173
+ print("-" * 60)
174
+ print(f"🏥 Primary Diagnosis: {top1_label}")
175
+ print(f"📈 Confidence: {top1_conf:.4f} ({top1_conf*100:.2f}%)")
176
+ print("\n🏆 Top 3 Possible Conditions:")
177
+ for rank, idx in enumerate(top3_idx, start=1):
178
+ label = label_encoder.inverse_transform([idx])[0]
179
+ print(f" {rank}. {label}: {proba[idx]:.4f} ({proba[idx]*100:.2f}%)")
180
+
181
+ except KeyboardInterrupt:
182
+ print("\n👋 Interrupted. Goodbye!")
183
+ break
184
+ except Exception as e:
185
+ print(f"❌ Error: {e}")
186
+
187
+
188
+ def main():
189
+ parser = argparse.ArgumentParser(description="Symptom checker using an XGBoost classifier.")
190
+ parser.add_argument(
191
+ "--csv",
192
+ type=str,
193
+ required=False,
194
+ help="Path to CSV dataset. First column must be target (disease), remaining columns symptoms.",
195
+ )
196
+ parser.add_argument(
197
+ "--save-prefix",
198
+ type=str,
199
+ default=None,
200
+ help="Prefix to save artifacts (creates .json/.labels.npy/.features.txt)",
201
+ )
202
+ parser.add_argument(
203
+ "--eval-only",
204
+ action="store_true",
205
+ help="Evaluate previously saved artifacts on --csv and exit (no training).",
206
+ )
207
+ parser.add_argument(
208
+ "--artifacts-prefix",
209
+ type=str,
210
+ default="symptom_checker/symptom_model",
211
+ help="Prefix path to load artifacts (default: symptom_checker/symptom_model)",
212
+ )
213
+ parser.add_argument(
214
+ "--interactive-only",
215
+ action="store_true",
216
+ help="Start interactive mode using saved artifacts only (no training).",
217
+ )
218
+ args = parser.parse_args()
219
+
220
+ if args.interactive_only:
221
+ try:
222
+ model, label_encoder, feature_names = load_artifacts(args.artifacts_prefix)
223
+ except FileNotFoundError as e:
224
+ print(str(e))
225
+ print("Train and save first, e.g.:\n python symptom_checker/symtom_checker.py --csv cleaned_dataset.csv --save-prefix symptom_checker/symptom_model")
226
+ return
227
+ interactive_loop(model, label_encoder, feature_names)
228
+ return
229
+
230
+ if args.eval_only:
231
+ if not args.csv:
232
+ print("Provide CSV for evaluation. Example:\n python symptom_checker/symtom_checker.py --eval-only --csv cleaned_dataset.csv --artifacts-prefix symptom_checker/symptom_model")
233
+ return
234
+ data = load_dataset(args.csv)
235
+ try:
236
+ model, label_encoder, feature_names = load_artifacts(args.artifacts_prefix)
237
+ except FileNotFoundError as e:
238
+ print(str(e))
239
+ return
240
+ target_col = data.columns[0]
241
+ missing = [c for c in feature_names if c not in data.columns]
242
+ if missing:
243
+ print(f"CSV missing {len(missing)} feature columns from training. Example missing: {missing[:10]}")
244
+ return
245
+ X = data.loc[:, feature_names].fillna(0).values
246
+ y = data[target_col].values
247
+ y_enc = label_encoder.transform(y)
248
+ proba = model.predict_proba(X)
249
+ y_pred = np.argmax(proba, axis=1)
250
+ acc = (y_pred == y_enc).mean()
251
+ print(f"Accuracy on provided CSV: {acc:.4f} ({acc*100:.2f}%)")
252
+ return
253
+
254
+ if not args.csv:
255
+ print("❗ No CSV provided. Run: python symptom_checker/symtom_checker.py --csv path/to/dataset.csv")
256
+ return
257
+
258
+ data = load_dataset(args.csv)
259
+ print("Shape of dataset:", data.shape)
260
+ model, label_encoder, symptom_names = train_model(data)
261
+
262
+ if args.save_prefix:
263
+ print("Saving artifacts...")
264
+ paths = save_artifacts(model, label_encoder, symptom_names, args.save_prefix)
265
+ for p in paths:
266
+ print(f" - {p}")
267
+
268
+ interactive_loop(model, label_encoder, symptom_names)
269
+
270
+
271
+ if __name__ == "__main__":
272
+ main()
273
+
symptom_model.features.txt ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ anxiety_and_nervousness
2
+ depression
3
+ shortness_of_breath
4
+ depressive_or_psychotic_symptoms
5
+ sharp_chest_pain
6
+ dizziness
7
+ insomnia
8
+ abnormal_involuntary_movements
9
+ chest_tightness
10
+ palpitations
11
+ irregular_heartbeat
12
+ breathing_fast
13
+ hoarse_voice
14
+ sore_throat
15
+ difficulty_speaking
16
+ cough
17
+ nasal_congestion
18
+ throat_swelling
19
+ diminished_hearing
20
+ lump_in_throat
21
+ throat_feels_tight
22
+ difficulty_in_swallowing
23
+ skin_swelling
24
+ retention_of_urine
25
+ groin_mass
26
+ leg_pain
27
+ hip_pain
28
+ suprapubic_pain
29
+ blood_in_stool
30
+ lack_of_growth
31
+ emotional_symptoms
32
+ elbow_weakness
33
+ back_weakness
34
+ symptoms_of_the_scrotum_and_testes
35
+ swelling_of_scrotum
36
+ pain_in_testicles
37
+ flatulence
38
+ pus_draining_from_ear
39
+ jaundice
40
+ mass_in_scrotum
41
+ white_discharge_from_eye
42
+ irritable_infant
43
+ abusing_alcohol
44
+ fainting
45
+ hostile_behavior
46
+ drug_abuse
47
+ sharp_abdominal_pain
48
+ feeling_ill
49
+ vomiting
50
+ headache
51
+ nausea
52
+ diarrhea
53
+ vaginal_itching
54
+ vaginal_dryness
55
+ painful_urination
56
+ involuntary_urination
57
+ pain_during_intercourse
58
+ frequent_urination
59
+ lower_abdominal_pain
60
+ vaginal_discharge
61
+ blood_in_urine
62
+ hot_flashes
63
+ intermenstrual_bleeding
64
+ hand_or_finger_pain
65
+ wrist_pain
66
+ hand_or_finger_swelling
67
+ arm_pain
68
+ wrist_swelling
69
+ arm_stiffness_or_tightness
70
+ arm_swelling
71
+ hand_or_finger_stiffness_or_tightness
72
+ wrist_stiffness_or_tightness
73
+ lip_swelling
74
+ toothache
75
+ abnormal_appearing_skin
76
+ skin_lesion
77
+ acne_or_pimples
78
+ dry_lips
79
+ facial_pain
80
+ mouth_ulcer
81
+ skin_growth
82
+ eye_deviation
83
+ diminished_vision
84
+ double_vision
85
+ cross-eyed
86
+ symptoms_of_eye
87
+ pain_in_eye
88
+ eye_moves_abnormally
89
+ abnormal_movement_of_eyelid
90
+ foreign_body_sensation_in_eye
91
+ irregular_appearing_scalp
92
+ swollen_lymph_nodes
93
+ back_pain
94
+ neck_pain
95
+ low_back_pain
96
+ pain_of_the_anus
97
+ pain_during_pregnancy
98
+ pelvic_pain
99
+ impotence
100
+ vomiting_blood
101
+ regurgitation
102
+ burning_abdominal_pain
103
+ restlessness
104
+ symptoms_of_infants
105
+ wheezing
106
+ peripheral_edema
107
+ neck_mass
108
+ ear_pain
109
+ jaw_swelling
110
+ mouth_dryness
111
+ neck_swelling
112
+ knee_pain
113
+ foot_or_toe_pain
114
+ ankle_pain
115
+ bones_are_painful
116
+ knee_weakness
117
+ elbow_pain
118
+ knee_swelling
119
+ skin_moles
120
+ knee_lump_or_mass
121
+ weight_gain
122
+ problems_with_movement
123
+ knee_stiffness_or_tightness
124
+ leg_swelling
125
+ foot_or_toe_swelling
126
+ heartburn
127
+ smoking_problems
128
+ muscle_pain
129
+ infant_feeding_problem
130
+ recent_weight_loss
131
+ difficulty_eating
132
+ vaginal_pain
133
+ vaginal_redness
134
+ vulvar_irritation
135
+ weakness
136
+ decreased_heart_rate
137
+ increased_heart_rate
138
+ bleeding_or_discharge_from_nipple
139
+ ringing_in_ear
140
+ plugged_feeling_in_ear
141
+ itchy_ear(s)
142
+ frontal_headache
143
+ fluid_in_ear
144
+ neck_stiffness_or_tightness
145
+ spots_or_clouds_in_vision
146
+ eye_redness
147
+ lacrimation
148
+ itchiness_of_eye
149
+ blindness
150
+ eye_burns_or_stings
151
+ itchy_eyelid
152
+ decreased_appetite
153
+ excessive_appetite
154
+ excessive_anger
155
+ loss_of_sensation
156
+ focal_weakness
157
+ slurring_words
158
+ symptoms_of_the_face
159
+ disturbance_of_memory
160
+ paresthesia
161
+ side_pain
162
+ fever
163
+ shoulder_pain
164
+ shoulder_stiffness_or_tightness
165
+ shoulder_weakness
166
+ shoulder_swelling
167
+ tongue_lesions
168
+ leg_cramps_or_spasms
169
+ ache_all_over
170
+ lower_body_pain
171
+ problems_during_pregnancy
172
+ spotting_or_bleeding_during_pregnancy
173
+ cramps_and_spasms
174
+ upper_abdominal_pain
175
+ stomach_bloating
176
+ changes_in_stool_appearance
177
+ unusual_color_or_odor_to_urine
178
+ kidney_mass
179
+ swollen_abdomen
180
+ symptoms_of_prostate
181
+ leg_stiffness_or_tightness
182
+ difficulty_breathing
183
+ rib_pain
184
+ joint_pain
185
+ muscle_stiffness_or_tightness
186
+ hand_or_finger_lump_or_mass
187
+ chills
188
+ groin_pain
189
+ fatigue
190
+ abdominal_distention
191
+ regurgitation.1
192
+ symptoms_of_the_kidneys
193
+ melena
194
+ coughing_up_sputum
195
+ seizures
196
+ delusions_or_hallucinations
197
+ pain_or_soreness_of_breast
198
+ excessive_urination_at_night
199
+ bleeding_from_eye
200
+ rectal_bleeding
201
+ constipation
202
+ temper_problems
203
+ coryza
204
+ hemoptysis
205
+ lymphedema
206
+ skin_on_leg_or_foot_looks_infected
207
+ allergic_reaction
208
+ congestion_in_chest
209
+ muscle_swelling
210
+ sleepiness
211
+ apnea
212
+ abnormal_breathing_sounds
213
+ blood_clots_during_menstrual_periods
214
+ absence_of_menstruation
215
+ pulling_at_ears
216
+ gum_pain
217
+ redness_in_ear
218
+ fluid_retention
219
+ flu-like_syndrome
220
+ sinus_congestion
221
+ painful_sinuses
222
+ fears_and_phobias
223
+ recent_pregnancy
224
+ uterine_contractions
225
+ burning_chest_pain
226
+ back_cramps_or_spasms
227
+ stiffness_all_over
228
+ muscle_cramps,_contractures,_or_spasms
229
+ back_mass_or_lump
230
+ nosebleed
231
+ long_menstrual_periods
232
+ heavy_menstrual_flow
233
+ unpredictable_menstruation
234
+ painful_menstruation
235
+ infertility
236
+ frequent_menstruation
237
+ sweating
238
+ mass_on_eyelid
239
+ swollen_eye
240
+ eyelid_swelling
241
+ eyelid_lesion_or_rash
242
+ symptoms_of_bladder
243
+ irregular_appearing_nails
244
+ itching_of_skin
245
+ hurts_to_breath
246
+ skin_dryness,_peeling,_scaliness,_or_roughness
247
+ skin_on_arm_or_hand_looks_infected
248
+ skin_irritation
249
+ itchy_scalp
250
+ warts
251
+ bumps_on_penis
252
+ too_little_hair
253
+ skin_rash
254
+ mass_or_swelling_around_the_anus
255
+ ankle_swelling
256
+ dry_or_flaky_scalp
257
+ foot_or_toe_stiffness_or_tightness
258
+ elbow_swelling
259
+ early_or_late_onset_of_menopause
260
+ bleeding_from_ear
261
+ hand_or_finger_weakness
262
+ low_self-esteem
263
+ itching_of_the_anus
264
+ swollen_or_red_tonsils
265
+ irregular_belly_button
266
+ hip_stiffness_or_tightness
267
+ mouth_pain
268
+ arm_weakness
269
+ penis_pain
270
+ loss_of_sex_drive
271
+ obsessions_and_compulsions
272
+ antisocial_behavior
273
+ neck_cramps_or_spasms
274
+ sneezing
275
+ leg_weakness
276
+ penis_redness
277
+ penile_discharge
278
+ shoulder_lump_or_mass
279
+ cloudy_eye
280
+ hysterical_behavior
281
+ arm_lump_or_mass
282
+ nightmares
283
+ bleeding_gums
284
+ pain_in_gums
285
+ bedwetting
286
+ diaper_rash
287
+ lump_or_mass_of_breast
288
+ postpartum_problems_of_the_breast
289
+ hesitancy
290
+ throat_redness
291
+ joint_swelling
292
+ redness_in_or_around_nose
293
+ wrinkles_on_skin
294
+ back_stiffness_or_tightness
295
+ wrist_lump_or_mass
296
+ low_urine_output
297
+ sore_in_nose
symptom_model.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1e0d191d55db28ae79157690febcd9121cf911e6bacc5dedba1cd30dbdc572
3
+ size 615592529
symptom_model.labels.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da84a97a05c084170813701d487044da5f40a019a81fb4896094042489657ac0
3
+ size 910