Spaces:
Sleeping
Sleeping
File size: 2,627 Bytes
d65f7e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import re
import logging
from difflib import get_close_matches
# Setup logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
def extract_disease_name(prediction):
"""
Extract just the disease name from a prediction that might include plant name
Args:
prediction: Full prediction string (e.g., "Tomato_Late_blight")
Returns:
Extracted disease name
"""
try:
# Handle common formats like "Plant_Disease" or "Plant Disease"
if '_' in prediction:
parts = prediction.split('_')
else:
parts = prediction.split(' ')
if len(parts) <= 1:
return prediction # Return as is if can't split
# Skip the plant name (first part) and join the rest
disease_parts = parts[1:]
disease_name = ' '.join(disease_parts)
# Clean up the disease name
disease_name = disease_name.replace('_', ' ').strip()
return disease_name
except Exception as e:
logger.error(f"Error extracting disease name: {str(e)}")
return prediction # Return original on error
def normalize_disease_name(disease_name):
"""
Normalize disease names for better matching
Args:
disease_name: Disease name to normalize
Returns:
Normalized disease name
"""
# Convert to lowercase
normalized = disease_name.lower()
# Remove special characters
normalized = re.sub(r'[^a-z0-9\s]', '', normalized)
# Replace multiple spaces with single space
normalized = re.sub(r'\s+', ' ', normalized).strip()
return normalized
def find_similar_disease(disease_name, known_diseases):
"""
Find the most similar disease name in a list of known diseases
Args:
disease_name: Query disease name
known_diseases: List of known disease names
Returns:
Most similar disease name, or the original if no good match
"""
# Normalize the query
norm_query = normalize_disease_name(disease_name)
# Normalize all known diseases
norm_known = [normalize_disease_name(d) for d in known_diseases]
# Find close matches
matches = get_close_matches(norm_query, norm_known, n=1, cutoff=0.6)
if matches:
# Get the index of the match in the normalized list
match_idx = norm_known.index(matches[0])
# Return the original form of the matched disease
return known_diseases[match_idx]
return disease_name # Return original if no good matches |