sunbal7 commited on
Commit
e23fe35
·
verified ·
1 Parent(s): f6bd117

Upload 2 files

Browse files
Files changed (2) hide show
  1. chatbot.py +125 -0
  2. ocr_processor.py +118 -0
chatbot.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from datetime import datetime
3
+
4
+ class HealthcareChatbot:
5
+ def __init__(self):
6
+ self.health_knowledge_base = {
7
+ 'heart_health': {
8
+ 'tips': [
9
+ "Maintain a heart-healthy diet rich in fruits, vegetables, and whole grains",
10
+ "Aim for at least 150 minutes of moderate exercise per week",
11
+ "Monitor your blood pressure regularly",
12
+ "Avoid smoking and limit alcohol consumption",
13
+ "Manage stress through relaxation techniques"
14
+ ],
15
+ 'symptoms': {
16
+ 'chest_pain': "Chest pain can be a sign of heart issues. Consult a doctor immediately.",
17
+ 'shortness_breath': "Shortness of breath may indicate cardiac problems. Seek medical attention."
18
+ }
19
+ },
20
+ 'diabetes_management': {
21
+ 'tips': [
22
+ "Monitor blood sugar levels regularly",
23
+ "Follow a balanced diet with controlled carbohydrates",
24
+ "Take medications as prescribed by your doctor",
25
+ "Stay physically active with regular exercise",
26
+ "Get regular eye and foot examinations"
27
+ ]
28
+ },
29
+ 'hypertension': {
30
+ 'tips': [
31
+ "Reduce sodium intake in your diet",
32
+ "Maintain a healthy body weight",
33
+ "Limit alcohol and caffeine consumption",
34
+ "Practice stress management techniques",
35
+ "Take prescribed medications regularly"
36
+ ]
37
+ },
38
+ 'general_health': {
39
+ 'tips': [
40
+ "Get 7-9 hours of quality sleep each night",
41
+ "Stay hydrated by drinking plenty of water",
42
+ "Practice good hygiene and hand washing",
43
+ "Get regular health check-ups",
44
+ "Maintain a balanced diet and healthy lifestyle"
45
+ ]
46
+ }
47
+ }
48
+
49
+ self.urdu_responses = {
50
+ 'greeting': "السلام علیکم! میں آپ کی صحت کے معاملات میں مدد کر سکتا ہوں۔",
51
+ 'heart_health': "دل کی صحت کے لیے مشورے:\n• پھل، سبزیاں اور سارے اناج سے بھرپور غذا کھائیں\n• ہفتے میں کم از کم 150 منٹ ورزش کریں\n• اپنا بلڈ پریشر باقاعدگی سے چیک کریں",
52
+ 'diabetes': "ذیابیطس کے انتظام کے لیے:\n• خون میں شکر کی سطح باقاعدگی سے چیک کریں\n• متوازن غذا کھائیں\n• دوائیں ڈاکٹر کے مشورے سے لیں",
53
+ 'hypertension': "ہائی بلڈ پریشر کے لیے:\n• نمک کا استعمال کم کریں\n• صحت مند وزن برقرار رکھیں\n• ورزش کو اپنی عادت بنائیں"
54
+ }
55
+
56
+ def get_response(self, user_input, language='English'):
57
+ """Generate context-aware healthcare responses"""
58
+ user_input_lower = user_input.lower()
59
+
60
+ # Greeting detection
61
+ if any(word in user_input_lower for word in ['hello', 'hi', 'hey', 'سلام', 'ہیلو']):
62
+ if language == 'Urdu':
63
+ return self.urdu_responses['greeting']
64
+ else:
65
+ return "Hello! I'm your healthcare assistant. How can I help you today?"
66
+
67
+ # Topic-based responses
68
+ response = self.generate_topic_response(user_input_lower, language)
69
+
70
+ return response
71
+
72
+ def generate_topic_response(self, user_input, language):
73
+ """Generate response based on detected health topic"""
74
+
75
+ if any(word in user_input for word in ['heart', 'cardiac', 'chest', 'دل', 'سینہ']):
76
+ tips = self.health_knowledge_base['heart_health']['tips']
77
+ if language == 'Urdu':
78
+ return self.urdu_responses['heart_health']
79
+ else:
80
+ return "Heart Health Tips:\n" + "\n".join([f"• {tip}" for tip in tips])
81
+
82
+ elif any(word in user_input for word in ['diabetes', 'sugar', 'glucose', 'شوگر', 'ذیابیطس']):
83
+ tips = self.health_knowledge_base['diabetes_management']['tips']
84
+ if language == 'Urdu':
85
+ return self.urdu_responses['diabetes']
86
+ else:
87
+ return "Diabetes Management Tips:\n" + "\n".join([f"• {tip}" for tip in tips])
88
+
89
+ elif any(word in user_input for word in ['blood pressure', 'hypertension', 'bp', 'بلڈ پریشر']):
90
+ tips = self.health_knowledge_base['hypertension']['tips']
91
+ if language == 'Urdu':
92
+ return self.urdu_responses['hypertension']
93
+ else:
94
+ return "Hypertension Management Tips:\n" + "\n".join([f"• {tip}" for tip in tips])
95
+
96
+ else:
97
+ # General health tips
98
+ tips = self.health_knowledge_base['general_health']['tips']
99
+ if language == 'Urdu':
100
+ return "عام صحت کے نکات:\n" + "\n".join([f"• {tip}" for tip in random.sample(tips, 3)])
101
+ else:
102
+ return "General Health Tips:\n" + "\n".join([f"• {tip}" for tip in random.sample(tips, 3)])
103
+
104
+ def get_emergency_advice(self, symptoms, language='English'):
105
+ """Provide emergency advice for concerning symptoms"""
106
+ emergency_symptoms = {
107
+ 'chest_pain': {
108
+ 'english': "Chest pain can be serious. Seek immediate medical attention.",
109
+ 'urdu': "سینے میں درد سنگین ہو سکتا ہے۔ فوری طبی امداد حاصل کریں۔"
110
+ },
111
+ 'severe_shortness_breath': {
112
+ 'english': "Severe shortness of breath requires emergency care.",
113
+ 'urdu': "سانس لینے میں شدید دشواری ہنگامی علاج کی ضرورت ہے۔"
114
+ },
115
+ 'fainting': {
116
+ 'english': "Fainting spells need immediate medical evaluation.",
117
+ 'urdu': "بیہوشی کے دورے فوری طبی تشخیص کی ضرورت ہے۔"
118
+ }
119
+ }
120
+
121
+ for symptom, advice in emergency_symptoms.items():
122
+ if symptom in symptoms:
123
+ return advice['urdu'] if language == 'Urdu' else advice['english']
124
+
125
+ return None
ocr_processor.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import pytesseract
4
+ from PIL import Image
5
+ import re
6
+
7
+ class PrescriptionOCR:
8
+ def __init__(self):
9
+ self.medication_keywords = [
10
+ 'tablet', 'capsule', 'mg', 'ml', 'injection', 'dose',
11
+ 'twice', 'thrice', 'daily', 'weekly', 'monthly'
12
+ ]
13
+
14
+ def preprocess_image(self, image):
15
+ """Enhanced image preprocessing for medical prescriptions"""
16
+ try:
17
+ # Convert to numpy array
18
+ img_array = np.array(image)
19
+
20
+ # Convert to grayscale
21
+ if len(img_array.shape) == 3:
22
+ gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
23
+ else:
24
+ gray = img_array
25
+
26
+ # Noise removal
27
+ denoised = cv2.medianBlur(gray, 3)
28
+
29
+ # Thresholding
30
+ _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
31
+
32
+ # Morphological operations
33
+ kernel = np.ones((2, 2), np.uint8)
34
+ processed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
35
+
36
+ return processed
37
+
38
+ except Exception as e:
39
+ print(f"Image preprocessing error: {e}")
40
+ return np.array(image)
41
+
42
+ def extract_medication_info(self, text):
43
+ """Extract medication information from OCR text"""
44
+ medications = []
45
+ lines = text.split('\n')
46
+
47
+ for line in lines:
48
+ line_clean = line.strip()
49
+ if any(keyword in line_clean.lower() for keyword in self.medication_keywords):
50
+ # Extract dosage information
51
+ dosage_pattern = r'(\d+\s*(?:mg|ml|tablet|cap)s?)'
52
+ dosages = re.findall(dosage_pattern, line_clean, re.IGNORECASE)
53
+
54
+ # Extract frequency
55
+ freq_pattern = r'(?:once|twice|thrice|\d+\s*times)\s*(?:daily|a day|per day)'
56
+ frequency = re.findall(freq_pattern, line_clean, re.IGNORECASE)
57
+
58
+ medication_info = {
59
+ 'text': line_clean,
60
+ 'dosages': dosages,
61
+ 'frequency': frequency[0] if frequency else 'Unknown',
62
+ 'confidence': 'High' if dosages else 'Medium'
63
+ }
64
+ medications.append(medication_info)
65
+
66
+ return medications
67
+
68
+ def process_prescription(self, image):
69
+ """Main method to process prescription and extract information"""
70
+ try:
71
+ # Preprocess image
72
+ processed_img = self.preprocess_image(image)
73
+
74
+ # OCR with medical-specific configuration
75
+ custom_config = r'--oem 3 --psm 6 -l eng'
76
+ extracted_text = pytesseract.image_to_string(processed_img, config=custom_config)
77
+
78
+ # Extract medication information
79
+ medications = self.extract_medication_info(extracted_text)
80
+
81
+ # Calculate confidence score
82
+ confidence = self.calculate_confidence(extracted_text, medications)
83
+
84
+ return {
85
+ 'raw_text': extracted_text,
86
+ 'medications': medications,
87
+ 'confidence_score': confidence,
88
+ 'medication_count': len(medications)
89
+ }
90
+
91
+ except Exception as e:
92
+ return {
93
+ 'raw_text': '',
94
+ 'medications': [],
95
+ 'confidence_score': 0,
96
+ 'error': str(e)
97
+ }
98
+
99
+ def calculate_confidence(self, text, medications):
100
+ """Calculate confidence score for OCR extraction"""
101
+ if not text.strip():
102
+ return 0
103
+
104
+ # Base score based on text length and medication detection
105
+ base_score = min(100, len(text) / 10)
106
+
107
+ # Bonus for medication detection
108
+ medication_bonus = len(medications) * 15
109
+
110
+ # Penalty for likely errors
111
+ error_penalty = 0
112
+ if len(text) < 20:
113
+ error_penalty += 20
114
+ if len(re.findall(r'[^\w\s.,]', text)) > len(text) * 0.3:
115
+ error_penalty += 15
116
+
117
+ final_score = base_score + medication_bonus - error_penalty
118
+ return max(0, min(100, final_score))