gkc55 commited on
Commit
3fe126f
·
0 Parent(s):

Add Flask-based Word Sense Disambiguation Tool with Enhanced Lesk Algorithm

Browse files

- Implemented a web application using Flask for word sense disambiguation.
- Added Enhanced Lesk algorithm with BERT integration for improved disambiguation accuracy.
- Created templates for input, results, error handling, and explanation of the Lesk algorithm.
- Included user feedback mechanism to adapt and improve disambiguation over time.
- Added example sentences for common ambiguous words to assist users.
- Established a feedback system to record user corrections and enhance future performance.
- Included necessary dependencies in requirements.txt for Flask, NLTK, Transformers, and PyTorch.

app.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for, jsonify, session
2
+ import nltk
3
+ from nltk.corpus import wordnet as wn
4
+ from nltk.tokenize import word_tokenize, sent_tokenize
5
+ from nltk.tag import pos_tag
6
+ from nltk.stem import WordNetLemmatizer
7
+ from collections import Counter
8
+ import re
9
+ import os
10
+ import json
11
+ import random
12
+
13
+ # Download required NLTK resources
14
+ nltk.download('wordnet')
15
+ nltk.download('punkt')
16
+ nltk.download('averaged_perceptron_tagger')
17
+ nltk.download('stopwords')
18
+
19
+ app = Flask(__name__)
20
+ app.secret_key = 'wsd_secret_key_2023'
21
+
22
+ # Path for storing feedback data
23
+ FEEDBACK_FILE = 'feedback_data.json'
24
+
25
+ class EnhancedLesk:
26
+ def __init__(self):
27
+ self.feedback = self.load_feedback()
28
+ self.lemmatizer = WordNetLemmatizer()
29
+ self.stopwords = set(nltk.corpus.stopwords.words('english'))
30
+
31
+ # Try to load BERT models if available
32
+ try:
33
+ from transformers import AutoTokenizer, AutoModel
34
+ import torch
35
+
36
+ # Load pre-trained model and tokenizer
37
+ print("Loading BERT models...")
38
+ self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
39
+ self.bert_model = AutoModel.from_pretrained('bert-base-uncased')
40
+ self.bert_available = True
41
+ print("BERT models loaded successfully")
42
+ except Exception as e:
43
+ print(f"BERT models not available: {e}")
44
+ print("Continuing without BERT embeddings")
45
+ self.bert_available = False
46
+
47
+ def load_feedback(self):
48
+ if os.path.exists(FEEDBACK_FILE):
49
+ with open(FEEDBACK_FILE) as f:
50
+ return json.load(f)
51
+ return {}
52
+
53
+ def save_feedback(self):
54
+ with open(FEEDBACK_FILE, 'w') as f:
55
+ json.dump(self.feedback, f)
56
+
57
+ def get_wordnet_pos(self, treebank_tag):
58
+ """Convert POS tag to WordNet POS format"""
59
+ if treebank_tag.startswith('J'):
60
+ return wn.ADJ
61
+ elif treebank_tag.startswith('V'):
62
+ return wn.VERB
63
+ elif treebank_tag.startswith('N'):
64
+ return wn.NOUN
65
+ elif treebank_tag.startswith('R'):
66
+ return wn.ADV
67
+ else:
68
+ return None
69
+
70
+ def process_context(self, sentence, target_word):
71
+ """Process context words with positional weighting"""
72
+ words = word_tokenize(sentence.lower())
73
+
74
+ # Find target word position
75
+ target_pos = -1
76
+ for i, word in enumerate(words):
77
+ if word.lower() == target_word.lower():
78
+ target_pos = i
79
+ break
80
+
81
+ # Process context words with proximity weighting
82
+ context_words = []
83
+ for i, word in enumerate(words):
84
+ if word.isalpha() and word not in self.stopwords:
85
+ lemma = self.lemmatizer.lemmatize(word)
86
+
87
+ # Weight by proximity to target word (closer = more important)
88
+ if target_pos >= 0:
89
+ distance = abs(i - target_pos)
90
+ # Add word multiple times based on proximity (max 5 times for adjacent words)
91
+ weight = max(1, 6 - distance) if distance <= 5 else 1
92
+ context_words.extend([lemma] * weight)
93
+ else:
94
+ context_words.append(lemma)
95
+
96
+ return context_words
97
+
98
+ def calculate_overlap_score(self, sense, context):
99
+ """Calculate overlap between sense signature and context with improved weighting"""
100
+ # Create rich signature from sense
101
+ signature = []
102
+
103
+ # Add definition words (higher weight)
104
+ def_words = [w.lower() for w in word_tokenize(sense.definition())
105
+ if w.isalpha() and w not in self.stopwords]
106
+ signature.extend(def_words * 2) # Double weight for definition
107
+
108
+ # Add example words
109
+ for example in sense.examples():
110
+ ex_words = [w.lower() for w in word_tokenize(example)
111
+ if w.isalpha() and w not in self.stopwords]
112
+ signature.extend(ex_words)
113
+
114
+ # Add hypernyms, hyponyms, meronyms and holonyms
115
+ for hypernym in sense.hypernyms():
116
+ hyper_words = [w.lower() for w in word_tokenize(hypernym.definition())
117
+ if w.isalpha() and w not in self.stopwords]
118
+ signature.extend(hyper_words)
119
+
120
+ for hyponym in sense.hyponyms():
121
+ hypo_words = [w.lower() for w in word_tokenize(hyponym.definition())
122
+ if w.isalpha() and w not in self.stopwords]
123
+ signature.extend(hypo_words)
124
+
125
+ # Add meronyms and holonyms
126
+ for meronym in sense.part_meronyms() + sense.substance_meronyms():
127
+ meronym_words = [w.lower() for w in word_tokenize(meronym.definition())
128
+ if w.isalpha() and w not in self.stopwords]
129
+ signature.extend(meronym_words)
130
+
131
+ for holonym in sense.part_holonyms() + sense.substance_holonyms():
132
+ holonym_words = [w.lower() for w in word_tokenize(holonym.definition())
133
+ if w.isalpha() and w not in self.stopwords]
134
+ signature.extend(holonym_words)
135
+
136
+ # Calculate overlap using Counter for better frequency matching
137
+ context_counter = Counter(context)
138
+ signature_counter = Counter(signature)
139
+
140
+ # Calculate weighted overlap
141
+ overlap_score = 0
142
+ for word, count in context_counter.items():
143
+ if word in signature_counter:
144
+ # Score is product of frequencies
145
+ overlap_score += count * min(signature_counter[word], 5)
146
+
147
+ return overlap_score
148
+
149
+ def bert_similarity(self, sense, context_sentence, target_word):
150
+ """Calculate semantic similarity using BERT embeddings"""
151
+ if not hasattr(self, 'bert_available') or not self.bert_available:
152
+ return 0
153
+
154
+ try:
155
+ import torch
156
+
157
+ # Create context-gloss pair as in GlossBERT
158
+ gloss = sense.definition()
159
+
160
+ # Tokenize
161
+ inputs = self.tokenizer(context_sentence, gloss, return_tensors="pt",
162
+ padding=True, truncation=True, max_length=512)
163
+
164
+ # Get embeddings
165
+ with torch.no_grad():
166
+ outputs = self.bert_model(**inputs)
167
+
168
+ # Use CLS token embedding for similarity
169
+ similarity = torch.cosine_similarity(
170
+ outputs.last_hidden_state[0, 0],
171
+ outputs.last_hidden_state[0, inputs.input_ids[0].tolist().index(self.tokenizer.sep_token_id) + 1]
172
+ ).item()
173
+
174
+ return similarity * 10 # Scale up to be comparable with other scores
175
+ except Exception as e:
176
+ print(f"Error in BERT similarity calculation: {e}")
177
+ return 0
178
+
179
+ def check_collocations(self, sentence, target_word):
180
+ """Check for common collocations that indicate specific senses"""
181
+ collocations = {
182
+ "bat": {
183
+ "noun.animal": ["flying bat", "bat flying", "bat wings", "vampire bat", "fruit bat", "bat in the dark", "bat at night"],
184
+ "noun.artifact": ["baseball bat", "cricket bat", "swing the bat", "wooden bat", "hit with bat"]
185
+ },
186
+ "bank": {
187
+ "noun.artifact": ["bank account", "bank manager", "bank loan", "bank robbery", "money in bank"],
188
+ "noun.object": ["river bank", "bank of the river", "west bank", "bank erosion", "along the bank"]
189
+ },
190
+ "bass": {
191
+ "noun.animal": ["bass fish", "catch bass", "fishing bass", "largemouth bass"],
192
+ "noun.attribute": ["bass sound", "bass guitar", "bass player", "bass note", "bass drum"]
193
+ },
194
+ "spring": {
195
+ "noun.time": ["spring season", "this spring", "last spring", "spring weather", "spring flowers"],
196
+ "noun.artifact": ["metal spring", "spring coil", "spring mechanism"],
197
+ "noun.object": ["water spring", "hot spring", "spring water"]
198
+ },
199
+ "crane": {
200
+ "noun.animal": ["crane bird", "crane flew", "crane nest", "crane species"],
201
+ "noun.artifact": ["construction crane", "crane operator", "crane lifted"]
202
+ }
203
+ }
204
+
205
+ if target_word not in collocations:
206
+ return None, 0
207
+
208
+ # Check for collocations in sentence
209
+ sentence_lower = sentence.lower()
210
+ for domain, phrases in collocations[target_word].items():
211
+ for phrase in phrases:
212
+ if phrase.lower() in sentence_lower:
213
+ # Find matching sense
214
+ for sense in wn.synsets(target_word):
215
+ if sense.lexname() == domain:
216
+ return sense, 15 # Very high confidence for collocations
217
+
218
+ return None, 0
219
+
220
+ def apply_rules(self, word, context, senses):
221
+ """Apply hand-coded rules for common ambiguous words"""
222
+ word = word.lower()
223
+ context_words = set(context)
224
+
225
+ # Rules for "bat"
226
+ if word == "bat":
227
+ # Animal sense rules
228
+ animal_indicators = {"fly", "flying", "flew", "wing", "wings", "night",
229
+ "dark", "cave", "nocturnal", "mammal", "animal", "leather", "leathery"}
230
+ if any(indicator in context_words for indicator in animal_indicators):
231
+ # Find animal sense
232
+ for sense in senses:
233
+ if sense.lexname() == "noun.animal":
234
+ return 10, sense # High confidence boost
235
+
236
+ # Sports equipment rules
237
+ sports_indicators = {"hit", "swing", "ball", "baseball", "cricket",
238
+ "player", "game", "sport", "team", "wooden"}
239
+ if any(indicator in context_words for indicator in sports_indicators):
240
+ # Find artifact sense
241
+ for sense in senses:
242
+ if sense.lexname() == "noun.artifact":
243
+ return 8, sense # High confidence boost
244
+
245
+ # Rules for "bank"
246
+ elif word == "bank":
247
+ # Financial institution rules
248
+ finance_indicators = {"money", "account", "deposit", "withdraw", "loan",
249
+ "credit", "debit", "financial", "cash", "check"}
250
+ if any(indicator in context_words for indicator in finance_indicators):
251
+ for sense in senses:
252
+ if "financial" in sense.definition() or "money" in sense.definition():
253
+ return 10, sense
254
+
255
+ # River bank rules
256
+ river_indicators = {"river", "stream", "water", "flow", "shore", "beach"}
257
+ if any(indicator in context_words for indicator in river_indicators):
258
+ for sense in senses:
259
+ if "river" in sense.definition() or "stream" in sense.definition():
260
+ return 10, sense
261
+
262
+ # Rules for "bass"
263
+ elif word == "bass":
264
+ # Fish sense rules
265
+ fish_indicators = {"fish", "fishing", "catch", "caught", "water", "lake", "river"}
266
+ if any(indicator in context_words for indicator in fish_indicators):
267
+ for sense in senses:
268
+ if sense.lexname() == "noun.animal":
269
+ return 10, sense
270
+
271
+ # Sound/music sense rules
272
+ music_indicators = {"music", "sound", "guitar", "player", "band", "note", "tone", "instrument", "concert", "loud"}
273
+ if any(indicator in context_words for indicator in music_indicators):
274
+ for sense in senses:
275
+ if sense.lexname() == "noun.attribute" or "music" in sense.definition():
276
+ return 10, sense
277
+
278
+ # No rule matched with high confidence
279
+ return 0, None
280
+
281
+ def safe_compare_synsets(self, synset1, synset2):
282
+ """Safely compare two synsets, handling None values."""
283
+ if synset1 is None or synset2 is None:
284
+ return synset1 is synset2 # True only if both are None
285
+
286
+ # Use the built-in equality check for synsets
287
+ try:
288
+ return synset1 == synset2
289
+ except AttributeError:
290
+ return False # If comparison fails, they're not equal
291
+
292
+ def disambiguate(self, sentence, word):
293
+ """Disambiguate a word in a given sentence context"""
294
+ word = word.lower()
295
+
296
+ # Get POS tag for the target word
297
+ word_tokens = word_tokenize(sentence)
298
+ pos_tags = pos_tag(word_tokens)
299
+ word_pos = None
300
+
301
+ for token, pos in pos_tags:
302
+ if token.lower() == word:
303
+ word_pos = self.get_wordnet_pos(pos)
304
+ break
305
+
306
+ # Get senses filtered by POS if available
307
+ if word_pos:
308
+ senses = [s for s in wn.synsets(word) if s.pos() == word_pos]
309
+ if not senses:
310
+ senses = wn.synsets(word)
311
+ else:
312
+ senses = wn.synsets(word)
313
+
314
+ if not senses:
315
+ return None, []
316
+
317
+ # Process context with positional weighting
318
+ context = self.process_context(sentence, word)
319
+
320
+ # 1. Check for collocations first (highest priority)
321
+ collocation_sense, collocation_score = self.check_collocations(sentence, word)
322
+ if collocation_sense and collocation_score > 0:
323
+ # Return the collocation sense and remaining senses as alternatives
324
+ top_senses = [s for s in senses if not self.safe_compare_synsets(s, collocation_sense)][:3]
325
+ return collocation_sense, top_senses
326
+
327
+ # 2. Apply rules for common ambiguous words
328
+ rule_score, rule_sense = self.apply_rules(word, context, senses)
329
+
330
+ # Score each sense
331
+ scored_senses = []
332
+ for sense in senses:
333
+ # If this sense was selected by rules, add the rule score
334
+ # FIX: Use safe comparison to prevent AttributeError
335
+ rule_boost = rule_score if (rule_sense is not None and self.safe_compare_synsets(sense, rule_sense)) else 0
336
+
337
+ # Calculate base score using overlap
338
+ overlap_score = self.calculate_overlap_score(sense, context)
339
+
340
+ # Calculate BERT similarity if available
341
+ bert_score = 0
342
+ if hasattr(self, 'bert_available') and self.bert_available:
343
+ bert_score = self.bert_similarity(sense, sentence, word)
344
+
345
+ # Apply feedback boost if available
346
+ feedback_key = f"{word}_{hash(sentence) % 10000}"
347
+ feedback_score = self.feedback.get(feedback_key, {}).get(sense.name(), 0)
348
+
349
+ # Calculate final score as weighted combination
350
+ final_score = (
351
+ overlap_score * 0.4 +
352
+ bert_score * 0.3 +
353
+ rule_boost * 0.2 +
354
+ feedback_score * 0.1
355
+ )
356
+
357
+ scored_senses.append((final_score, sense))
358
+
359
+ scored_senses.sort(reverse=True, key=lambda x: x[0])
360
+
361
+ if not scored_senses:
362
+ return None, []
363
+
364
+ best_sense = scored_senses[0][1]
365
+ top_senses = [s[1] for s in scored_senses[1:4]]
366
+ return best_sense, top_senses
367
+
368
+ def add_feedback(self, word, context, correct_sense):
369
+ """Store user feedback to improve future disambiguation"""
370
+ # Create a key based on word and hashed context
371
+ context_str = ' '.join(context[:10]) # Use first 10 context words
372
+ key = f"{word}_{hash(context_str) % 10000}"
373
+
374
+ if key not in self.feedback:
375
+ self.feedback[key] = {}
376
+
377
+ # Increase score for the correct sense
378
+ self.feedback[key][correct_sense] = self.feedback[key].get(correct_sense, 0) + 5
379
+
380
+ # Optionally decrease scores for other senses
381
+ for sense in wn.synsets(word):
382
+ if sense.name() != correct_sense and sense.name() in self.feedback[key]:
383
+ self.feedback[key][sense.name()] = max(0, self.feedback[key][sense.name()] - 1)
384
+
385
+ self.save_feedback()
386
+
387
+ # Return the updated sense information
388
+ for sense in wn.synsets(word):
389
+ if sense.name() == correct_sense:
390
+ return {
391
+ 'definition': sense.definition(),
392
+ 'examples': sense.examples()
393
+ }
394
+
395
+ return None
396
+
397
+ # Initialize the Lesk processor
398
+ lesk_processor = EnhancedLesk()
399
+
400
+ @app.route('/', methods=['GET', 'POST'])
401
+ def index():
402
+ if request.method == 'POST':
403
+ text = request.form['text']
404
+ target_word = request.form.get('target_word', '')
405
+ return redirect(url_for('results', text=text, word=target_word))
406
+ return render_template('index.html')
407
+
408
+ @app.route('/results')
409
+ def results():
410
+ text = request.args.get('text', '')
411
+ target_word = request.args.get('word', '').lower()
412
+
413
+ if not target_word:
414
+ # Find ambiguous words (with multiple senses)
415
+ words = word_tokenize(text.lower())
416
+ ambiguous_words = []
417
+ for word in words:
418
+ if word.isalpha() and len(wn.synsets(word)) > 1:
419
+ ambiguous_words.append(word)
420
+
421
+ # If there are ambiguous words, use the first one
422
+ if ambiguous_words:
423
+ target_word = ambiguous_words[0]
424
+
425
+ best_sense = None
426
+ top_senses = []
427
+ highlighted_text = text
428
+ sentence = ""
429
+ context_words = []
430
+
431
+ if target_word:
432
+ sentences = sent_tokenize(text)
433
+ for sent in sentences:
434
+ if re.search(r'\b' + re.escape(target_word) + r'\b', sent, re.I):
435
+ sentence = sent
436
+ context_words = lesk_processor.process_context(sent, target_word)
437
+ try:
438
+ best_sense, top_senses = lesk_processor.disambiguate(sent, target_word)
439
+ except Exception as e:
440
+ print(f"Disambiguation error: {e}")
441
+ return render_template('error.html',
442
+ error_message=f"Could not disambiguate the word '{target_word}'. Please try a different word or sentence.",
443
+ error_details=str(e))
444
+
445
+ highlighted_text = re.sub(
446
+ r'\b' + re.escape(target_word) + r'\b',
447
+ f'<span class="highlight-word">{target_word}</span>',
448
+ text,
449
+ flags=re.IGNORECASE
450
+ )
451
+ break
452
+
453
+ # Store in session for feedback
454
+ if best_sense:
455
+ session['last_disambiguation'] = {
456
+ 'word': target_word,
457
+ 'context': context_words,
458
+ 'sentence': sentence
459
+ }
460
+
461
+ return render_template('results.html',
462
+ text=text,
463
+ highlighted_text=highlighted_text,
464
+ target_word=target_word,
465
+ best_sense=best_sense,
466
+ top_senses=top_senses,
467
+ sentence=sentence,
468
+ context_words=', '.join([w for w in set(context_words)][:10])) # Show unique context words
469
+
470
+ @app.route('/feedback', methods=['POST'])
471
+ def feedback():
472
+ data = request.get_json()
473
+ word = data.get('word')
474
+ context = data.get('context', [])
475
+ correct_sense = data.get('correct_sense')
476
+
477
+ if word and correct_sense:
478
+ updated_sense = lesk_processor.add_feedback(word, context, correct_sense)
479
+ return jsonify(updated_sense)
480
+
481
+ return jsonify({'error': 'Invalid feedback data'}), 400
482
+
483
+ @app.route('/lesk-explained')
484
+ def lesk_explained():
485
+ return render_template('lesk_explained.html')
486
+
487
+ # Add error template handler
488
+ @app.route('/error')
489
+ def error():
490
+ error_message = request.args.get('message', 'An unknown error occurred')
491
+ error_details = request.args.get('details', '')
492
+ return render_template('error.html', error_message=error_message, error_details=error_details)
493
+
494
+ if __name__ == '__main__':
495
+ app.run(debug=True)
code.txt ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for, jsonify, session
2
+ import nltk
3
+ from nltk.corpus import wordnet as wn
4
+ from nltk.tokenize import word_tokenize, sent_tokenize
5
+ from nltk.tag import pos_tag
6
+ from nltk.stem import WordNetLemmatizer
7
+ from collections import Counter
8
+ import re
9
+ import os
10
+ import json
11
+ import random
12
+
13
+ # Download required NLTK resources
14
+ nltk.download('wordnet')
15
+ nltk.download('punkt')
16
+ nltk.download('averaged_perceptron_tagger')
17
+ nltk.download('stopwords')
18
+
19
+ app = Flask(__name__)
20
+ app.secret_key = 'wsd_secret_key_2023'
21
+
22
+ # Path for storing feedback data
23
+ FEEDBACK_FILE = 'feedback_data.json'
24
+
25
+ class EnhancedLesk:
26
+ def __init__(self):
27
+ self.feedback = self.load_feedback()
28
+ self.lemmatizer = WordNetLemmatizer()
29
+ self.stopwords = set(nltk.corpus.stopwords.words('english'))
30
+
31
+ # Try to load BERT models if available
32
+ try:
33
+ from transformers import AutoTokenizer, AutoModel
34
+ import torch
35
+
36
+ # Load pre-trained model and tokenizer
37
+ print("Loading BERT models...")
38
+ self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
39
+ self.bert_model = AutoModel.from_pretrained('bert-base-uncased')
40
+ self.bert_available = True
41
+ print("BERT models loaded successfully")
42
+ except Exception as e:
43
+ print(f"BERT models not available: {e}")
44
+ print("Continuing without BERT embeddings")
45
+ self.bert_available = False
46
+
47
+ def load_feedback(self):
48
+ if os.path.exists(FEEDBACK_FILE):
49
+ with open(FEEDBACK_FILE) as f:
50
+ return json.load(f)
51
+ return {}
52
+
53
+ def save_feedback(self):
54
+ with open(FEEDBACK_FILE, 'w') as f:
55
+ json.dump(self.feedback, f)
56
+
57
+ def get_wordnet_pos(self, treebank_tag):
58
+ """Convert POS tag to WordNet POS format"""
59
+ if treebank_tag.startswith('J'):
60
+ return wn.ADJ
61
+ elif treebank_tag.startswith('V'):
62
+ return wn.VERB
63
+ elif treebank_tag.startswith('N'):
64
+ return wn.NOUN
65
+ elif treebank_tag.startswith('R'):
66
+ return wn.ADV
67
+ else:
68
+ return None
69
+
70
+ def process_context(self, sentence, target_word):
71
+ """Process context words with positional weighting"""
72
+ words = word_tokenize(sentence.lower())
73
+
74
+ # Find target word position
75
+ target_pos = -1
76
+ for i, word in enumerate(words):
77
+ if word.lower() == target_word.lower():
78
+ target_pos = i
79
+ break
80
+
81
+ # Process context words with proximity weighting
82
+ context_words = []
83
+ for i, word in enumerate(words):
84
+ if word.isalpha() and word not in self.stopwords:
85
+ lemma = self.lemmatizer.lemmatize(word)
86
+
87
+ # Weight by proximity to target word (closer = more important)
88
+ if target_pos >= 0:
89
+ distance = abs(i - target_pos)
90
+ # Add word multiple times based on proximity (max 5 times for adjacent words)
91
+ weight = max(1, 6 - distance) if distance <= 5 else 1
92
+ context_words.extend([lemma] * weight)
93
+ else:
94
+ context_words.append(lemma)
95
+
96
+ return context_words
97
+
98
+ def calculate_overlap_score(self, sense, context):
99
+ """Calculate overlap between sense signature and context with improved weighting"""
100
+ # Create rich signature from sense
101
+ signature = []
102
+
103
+ # Add definition words (higher weight)
104
+ def_words = [w.lower() for w in word_tokenize(sense.definition())
105
+ if w.isalpha() and w not in self.stopwords]
106
+ signature.extend(def_words * 2) # Double weight for definition
107
+
108
+ # Add example words
109
+ for example in sense.examples():
110
+ ex_words = [w.lower() for w in word_tokenize(example)
111
+ if w.isalpha() and w not in self.stopwords]
112
+ signature.extend(ex_words)
113
+
114
+ # Add hypernyms, hyponyms, meronyms and holonyms
115
+ for hypernym in sense.hypernyms():
116
+ hyper_words = [w.lower() for w in word_tokenize(hypernym.definition())
117
+ if w.isalpha() and w not in self.stopwords]
118
+ signature.extend(hyper_words)
119
+
120
+ for hyponym in sense.hyponyms():
121
+ hypo_words = [w.lower() for w in word_tokenize(hyponym.definition())
122
+ if w.isalpha() and w not in self.stopwords]
123
+ signature.extend(hypo_words)
124
+
125
+ # Add meronyms and holonyms
126
+ for meronym in sense.part_meronyms() + sense.substance_meronyms():
127
+ meronym_words = [w.lower() for w in word_tokenize(meronym.definition())
128
+ if w.isalpha() and w not in self.stopwords]
129
+ signature.extend(meronym_words)
130
+
131
+ for holonym in sense.part_holonyms() + sense.substance_holonyms():
132
+ holonym_words = [w.lower() for w in word_tokenize(holonym.definition())
133
+ if w.isalpha() and w not in self.stopwords]
134
+ signature.extend(holonym_words)
135
+
136
+ # Calculate overlap using Counter for better frequency matching
137
+ context_counter = Counter(context)
138
+ signature_counter = Counter(signature)
139
+
140
+ # Calculate weighted overlap
141
+ overlap_score = 0
142
+ for word, count in context_counter.items():
143
+ if word in signature_counter:
144
+ # Score is product of frequencies
145
+ overlap_score += count * min(signature_counter[word], 5)
146
+
147
+ return overlap_score
148
+
149
+ def bert_similarity(self, sense, context_sentence, target_word):
150
+ """Calculate semantic similarity using BERT embeddings"""
151
+ if not hasattr(self, 'bert_available') or not self.bert_available:
152
+ return 0
153
+
154
+ try:
155
+ import torch
156
+
157
+ # Create context-gloss pair as in GlossBERT
158
+ gloss = sense.definition()
159
+
160
+ # Tokenize
161
+ inputs = self.tokenizer(context_sentence, gloss, return_tensors="pt",
162
+ padding=True, truncation=True, max_length=512)
163
+
164
+ # Get embeddings
165
+ with torch.no_grad():
166
+ outputs = self.bert_model(**inputs)
167
+
168
+ # Use CLS token embedding for similarity
169
+ similarity = torch.cosine_similarity(
170
+ outputs.last_hidden_state[0, 0],
171
+ outputs.last_hidden_state[0, inputs.input_ids[0].tolist().index(self.tokenizer.sep_token_id) + 1]
172
+ ).item()
173
+
174
+ return similarity * 10 # Scale up to be comparable with other scores
175
+ except Exception as e:
176
+ print(f"Error in BERT similarity calculation: {e}")
177
+ return 0
178
+
179
+ def check_collocations(self, sentence, target_word):
180
+ """Check for common collocations that indicate specific senses"""
181
+ collocations = {
182
+ "bat": {
183
+ "noun.animal": ["flying bat", "bat flying", "bat wings", "vampire bat", "fruit bat", "bat in the dark", "bat at night"],
184
+ "noun.artifact": ["baseball bat", "cricket bat", "swing the bat", "wooden bat", "hit with bat"]
185
+ },
186
+ "bank": {
187
+ "noun.artifact": ["bank account", "bank manager", "bank loan", "bank robbery", "money in bank"],
188
+ "noun.object": ["river bank", "bank of the river", "west bank", "bank erosion", "along the bank"]
189
+ },
190
+ "bass": {
191
+ "noun.animal": ["bass fish", "catch bass", "fishing bass", "largemouth bass"],
192
+ "noun.attribute": ["bass sound", "bass guitar", "bass player", "bass note", "bass drum"]
193
+ },
194
+ "spring": {
195
+ "noun.time": ["spring season", "this spring", "last spring", "spring weather", "spring flowers"],
196
+ "noun.artifact": ["metal spring", "spring coil", "spring mechanism"],
197
+ "noun.object": ["water spring", "hot spring", "spring water"]
198
+ },
199
+ "crane": {
200
+ "noun.animal": ["crane bird", "crane flew", "crane nest", "crane species"],
201
+ "noun.artifact": ["construction crane", "crane operator", "crane lifted"]
202
+ }
203
+ }
204
+
205
+ if target_word not in collocations:
206
+ return None, 0
207
+
208
+ # Check for collocations in sentence
209
+ sentence_lower = sentence.lower()
210
+ for domain, phrases in collocations[target_word].items():
211
+ for phrase in phrases:
212
+ if phrase.lower() in sentence_lower:
213
+ # Find matching sense
214
+ for sense in wn.synsets(target_word):
215
+ if sense.lexname() == domain:
216
+ return sense, 15 # Very high confidence for collocations
217
+
218
+ return None, 0
219
+
220
+ def apply_rules(self, word, context, senses):
221
+ """Apply hand-coded rules for common ambiguous words"""
222
+ word = word.lower()
223
+ context_words = set(context)
224
+
225
+ # Rules for "bat"
226
+ if word == "bat":
227
+ # Animal sense rules
228
+ animal_indicators = {"fly", "flying", "flew", "wing", "wings", "night",
229
+ "dark", "cave", "nocturnal", "mammal", "animal", "leather", "leathery"}
230
+ if any(indicator in context_words for indicator in animal_indicators):
231
+ # Find animal sense
232
+ for sense in senses:
233
+ if sense.lexname() == "noun.animal":
234
+ return 10, sense # High confidence boost
235
+
236
+ # Sports equipment rules
237
+ sports_indicators = {"hit", "swing", "ball", "baseball", "cricket",
238
+ "player", "game", "sport", "team", "wooden"}
239
+ if any(indicator in context_words for indicator in sports_indicators):
240
+ # Find artifact sense
241
+ for sense in senses:
242
+ if sense.lexname() == "noun.artifact":
243
+ return 8, sense # High confidence boost
244
+
245
+ # Rules for "bank"
246
+ elif word == "bank":
247
+ # Financial institution rules
248
+ finance_indicators = {"money", "account", "deposit", "withdraw", "loan",
249
+ "credit", "debit", "financial", "cash", "check"}
250
+ if any(indicator in context_words for indicator in finance_indicators):
251
+ for sense in senses:
252
+ if "financial" in sense.definition() or "money" in sense.definition():
253
+ return 10, sense
254
+
255
+ # River bank rules
256
+ river_indicators = {"river", "stream", "water", "flow", "shore", "beach"}
257
+ if any(indicator in context_words for indicator in river_indicators):
258
+ for sense in senses:
259
+ if "river" in sense.definition() or "stream" in sense.definition():
260
+ return 10, sense
261
+
262
+ # Rules for "bass"
263
+ elif word == "bass":
264
+ # Fish sense rules
265
+ fish_indicators = {"fish", "fishing", "catch", "caught", "water", "lake", "river"}
266
+ if any(indicator in context_words for indicator in fish_indicators):
267
+ for sense in senses:
268
+ if sense.lexname() == "noun.animal":
269
+ return 10, sense
270
+
271
+ # Sound/music sense rules
272
+ music_indicators = {"music", "sound", "guitar", "player", "band", "note", "tone", "instrument", "concert", "loud"}
273
+ if any(indicator in context_words for indicator in music_indicators):
274
+ for sense in senses:
275
+ if sense.lexname() == "noun.attribute" or "music" in sense.definition():
276
+ return 10, sense
277
+
278
+ # No rule matched with high confidence
279
+ return 0, None
280
+
281
+ def safe_compare_synsets(self, synset1, synset2):
282
+ """Safely compare two synsets, handling None values."""
283
+ if synset1 is None or synset2 is None:
284
+ return synset1 is synset2 # True only if both are None
285
+
286
+ # Use the built-in equality check for synsets
287
+ try:
288
+ return synset1 == synset2
289
+ except AttributeError:
290
+ return False # If comparison fails, they're not equal
291
+
292
+ def disambiguate(self, sentence, word):
293
+ """Disambiguate a word in a given sentence context"""
294
+ word = word.lower()
295
+
296
+ # Get POS tag for the target word
297
+ word_tokens = word_tokenize(sentence)
298
+ pos_tags = pos_tag(word_tokens)
299
+ word_pos = None
300
+
301
+ for token, pos in pos_tags:
302
+ if token.lower() == word:
303
+ word_pos = self.get_wordnet_pos(pos)
304
+ break
305
+
306
+ # Get senses filtered by POS if available
307
+ if word_pos:
308
+ senses = [s for s in wn.synsets(word) if s.pos() == word_pos]
309
+ if not senses:
310
+ senses = wn.synsets(word)
311
+ else:
312
+ senses = wn.synsets(word)
313
+
314
+ if not senses:
315
+ return None, []
316
+
317
+ # Process context with positional weighting
318
+ context = self.process_context(sentence, word)
319
+
320
+ # 1. Check for collocations first (highest priority)
321
+ collocation_sense, collocation_score = self.check_collocations(sentence, word)
322
+ if collocation_sense and collocation_score > 0:
323
+ # Return the collocation sense and remaining senses as alternatives
324
+ top_senses = [s for s in senses if not self.safe_compare_synsets(s, collocation_sense)][:3]
325
+ return collocation_sense, top_senses
326
+
327
+ # 2. Apply rules for common ambiguous words
328
+ rule_score, rule_sense = self.apply_rules(word, context, senses)
329
+
330
+ # Score each sense
331
+ scored_senses = []
332
+ for sense in senses:
333
+ # If this sense was selected by rules, add the rule score
334
+ # FIX: Use safe comparison to prevent AttributeError
335
+ rule_boost = rule_score if (rule_sense is not None and self.safe_compare_synsets(sense, rule_sense)) else 0
336
+
337
+ # Calculate base score using overlap
338
+ overlap_score = self.calculate_overlap_score(sense, context)
339
+
340
+ # Calculate BERT similarity if available
341
+ bert_score = 0
342
+ if hasattr(self, 'bert_available') and self.bert_available:
343
+ bert_score = self.bert_similarity(sense, sentence, word)
344
+
345
+ # Apply feedback boost if available
346
+ feedback_key = f"{word}_{hash(sentence) % 10000}"
347
+ feedback_score = self.feedback.get(feedback_key, {}).get(sense.name(), 0)
348
+
349
+ # Calculate final score as weighted combination
350
+ final_score = (
351
+ overlap_score * 0.4 +
352
+ bert_score * 0.3 +
353
+ rule_boost * 0.2 +
354
+ feedback_score * 0.1
355
+ )
356
+
357
+ scored_senses.append((final_score, sense))
358
+
359
+ scored_senses.sort(reverse=True, key=lambda x: x[0])
360
+
361
+ if not scored_senses:
362
+ return None, []
363
+
364
+ best_sense = scored_senses[0][1]
365
+ top_senses = [s[1] for s in scored_senses[1:4]]
366
+ return best_sense, top_senses
367
+
368
+ def add_feedback(self, word, context, correct_sense):
369
+ """Store user feedback to improve future disambiguation"""
370
+ # Create a key based on word and hashed context
371
+ context_str = ' '.join(context[:10]) # Use first 10 context words
372
+ key = f"{word}_{hash(context_str) % 10000}"
373
+
374
+ if key not in self.feedback:
375
+ self.feedback[key] = {}
376
+
377
+ # Increase score for the correct sense
378
+ self.feedback[key][correct_sense] = self.feedback[key].get(correct_sense, 0) + 5
379
+
380
+ # Optionally decrease scores for other senses
381
+ for sense in wn.synsets(word):
382
+ if sense.name() != correct_sense and sense.name() in self.feedback[key]:
383
+ self.feedback[key][sense.name()] = max(0, self.feedback[key][sense.name()] - 1)
384
+
385
+ self.save_feedback()
386
+
387
+ # Return the updated sense information
388
+ for sense in wn.synsets(word):
389
+ if sense.name() == correct_sense:
390
+ return {
391
+ 'definition': sense.definition(),
392
+ 'examples': sense.examples()
393
+ }
394
+
395
+ return None
396
+
397
+ # Initialize the Lesk processor
398
+ lesk_processor = EnhancedLesk()
399
+
400
+ @app.route('/', methods=['GET', 'POST'])
401
+ def index():
402
+ if request.method == 'POST':
403
+ text = request.form['text']
404
+ target_word = request.form.get('target_word', '')
405
+ return redirect(url_for('results', text=text, word=target_word))
406
+ return render_template('index.html')
407
+
408
+ @app.route('/results')
409
+ def results():
410
+ text = request.args.get('text', '')
411
+ target_word = request.args.get('word', '').lower()
412
+
413
+ if not target_word:
414
+ # Find ambiguous words (with multiple senses)
415
+ words = word_tokenize(text.lower())
416
+ ambiguous_words = []
417
+ for word in words:
418
+ if word.isalpha() and len(wn.synsets(word)) > 1:
419
+ ambiguous_words.append(word)
420
+
421
+ # If there are ambiguous words, use the first one
422
+ if ambiguous_words:
423
+ target_word = ambiguous_words[0]
424
+
425
+ best_sense = None
426
+ top_senses = []
427
+ highlighted_text = text
428
+ sentence = ""
429
+ context_words = []
430
+
431
+ if target_word:
432
+ sentences = sent_tokenize(text)
433
+ for sent in sentences:
434
+ if re.search(r'\b' + re.escape(target_word) + r'\b', sent, re.I):
435
+ sentence = sent
436
+ context_words = lesk_processor.process_context(sent, target_word)
437
+ try:
438
+ best_sense, top_senses = lesk_processor.disambiguate(sent, target_word)
439
+ except Exception as e:
440
+ print(f"Disambiguation error: {e}")
441
+ return render_template('error.html',
442
+ error_message=f"Could not disambiguate the word '{target_word}'. Please try a different word or sentence.",
443
+ error_details=str(e))
444
+
445
+ highlighted_text = re.sub(
446
+ r'\b' + re.escape(target_word) + r'\b',
447
+ f'<span class="highlight-word">{target_word}</span>',
448
+ text,
449
+ flags=re.IGNORECASE
450
+ )
451
+ break
452
+
453
+ # Store in session for feedback
454
+ if best_sense:
455
+ session['last_disambiguation'] = {
456
+ 'word': target_word,
457
+ 'context': context_words,
458
+ 'sentence': sentence
459
+ }
460
+
461
+ return render_template('results.html',
462
+ text=text,
463
+ highlighted_text=highlighted_text,
464
+ target_word=target_word,
465
+ best_sense=best_sense,
466
+ top_senses=top_senses,
467
+ sentence=sentence,
468
+ context_words=', '.join([w for w in set(context_words)][:10])) # Show unique context words
469
+
470
+ @app.route('/feedback', methods=['POST'])
471
+ def feedback():
472
+ data = request.get_json()
473
+ word = data.get('word')
474
+ context = data.get('context', [])
475
+ correct_sense = data.get('correct_sense')
476
+
477
+ if word and correct_sense:
478
+ updated_sense = lesk_processor.add_feedback(word, context, correct_sense)
479
+ return jsonify(updated_sense)
480
+
481
+ return jsonify({'error': 'Invalid feedback data'}), 400
482
+
483
+ @app.route('/lesk-explained')
484
+ def lesk_explained():
485
+ return render_template('lesk_explained.html')
486
+
487
+ # Add error template handler
488
+ @app.route('/error')
489
+ def error():
490
+ error_message = request.args.get('message', 'An unknown error occurred')
491
+ error_details = request.args.get('details', '')
492
+ return render_template('error.html', error_message=error_message, error_details=error_details)
493
+
494
+ if __name__ == '__main__':
495
+ app.run(debug=True)
feedback_data.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tripped_i_go_trip_and_tripped_due_to_imbalance": {"stumble.v.02": 5}, "bat_8076": {"bat.n.01": 5}, "saw_8076": {"see.v.19": 5}, "spring_1682": {"spring.n.01": 5}, "trunk_9387": {"proboscis.n.02": 5}, "bank_7813": {"bank.n.01": 5}}
flow.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import matplotlib.patches as mpatches
3
+ from matplotlib.sankey import Sankey
4
+
5
+ # Create a flowchart using matplotlib with boxes and arrows
6
+
7
+ def draw_flowchart():
8
+ fig, ax = plt.subplots(figsize=(12, 10))
9
+ ax.axis('off')
10
+
11
+ # Define boxes with text
12
+ boxes = {
13
+ 'start': (0.4, 0.9, 0.2, 0.05, 'Start: Input Sentence and Target Word'),
14
+ 'pos_tag': (0.4, 0.82, 0.2, 0.05, 'POS Tagging of Target Word'),
15
+ 'get_senses': (0.4, 0.74, 0.2, 0.05, 'Get WordNet Senses (Filtered by POS)'),
16
+ 'process_context': (0.4, 0.66, 0.2, 0.05, 'Process Context with Positional Weighting'),
17
+ 'check_collocations': (0.4, 0.58, 0.2, 0.05, 'Check for Collocations'),
18
+ 'apply_rules': (0.4, 0.5, 0.2, 0.05, 'Apply Rule-Based Boosting'),
19
+ 'calculate_overlap': (0.4, 0.42, 0.2, 0.05, 'Calculate Overlap Score (Lesk)'),
20
+ 'bert_similarity': (0.4, 0.34, 0.2, 0.05, 'Calculate BERT Semantic Similarity'),
21
+ 'feedback_boost': (0.4, 0.26, 0.2, 0.05, 'Apply Feedback Boost'),
22
+ 'combine_scores': (0.4, 0.18, 0.2, 0.05, 'Combine Scores with Weights'),
23
+ 'select_best': (0.4, 0.1, 0.2, 0.05, 'Select Best Sense and Alternatives'),
24
+ 'end': (0.4, 0.02, 0.2, 0.05, 'End: Return Disambiguation Result')
25
+ }
26
+
27
+ # Draw boxes
28
+ for key, (x, y, w, h, text) in boxes.items():
29
+ rect = plt.Rectangle((x, y), w, h, fill=True, edgecolor='black', facecolor='#cce5ff')
30
+ ax.add_patch(rect)
31
+ ax.text(x + w/2, y + h/2, text, ha='center', va='center', fontsize=10, wrap=True)
32
+
33
+ # Draw arrows between boxes
34
+ def draw_arrow(start_key, end_key):
35
+ x_start, y_start, w_start, h_start, _ = boxes[start_key]
36
+ x_end, y_end, w_end, h_end, _ = boxes[end_key]
37
+ ax.annotate('', xy=(x_end + w_end/2, y_end + h_end), xytext=(x_start + w_start/2, y_start),
38
+ arrowprops=dict(arrowstyle='->', lw=1.5))
39
+
40
+ flow_sequence = [
41
+ 'start', 'pos_tag', 'get_senses', 'process_context', 'check_collocations',
42
+ 'apply_rules', 'calculate_overlap', 'bert_similarity', 'feedback_boost',
43
+ 'combine_scores', 'select_best', 'end'
44
+ ]
45
+
46
+ for i in range(len(flow_sequence) - 1):
47
+ draw_arrow(flow_sequence[i], flow_sequence[i+1])
48
+
49
+ plt.title('Flowchart of Enhanced Lesk-based Word Sense Disambiguation Algorithm', fontsize=14)
50
+ plt.show()
51
+
52
+ # Draw the flowchart
53
+ draw_flowchart()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Flask==2.3.2
2
+ nltk==3.8.1
3
+ Werkzeug==2.3.6
4
+ transformers==4.28.1
5
+ torch==2.2.0
tempCodeRunnerFile.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for, jsonify, session
2
+ import nltk
3
+ from nltk.corpus import wordnet as wn
4
+ from nltk.tokenize import word_tokenize, sent_tokenize
5
+ from nltk.tag import pos_tag
6
+ from nltk.stem import WordNetLemmatizer
7
+ from collections import Counter
8
+ import re
9
+ import os
10
+ import json
11
+ import random
12
+
13
+ # Download required NLTK resources
14
+ nltk.download('wordnet')
15
+ nltk.download('punkt')
16
+ nltk.download('averaged_perceptron_tagger')
17
+ nltk.download('stopwords')
18
+
19
+ app = Flask(__name__)
20
+ app.secret_key = 'wsd_secret_key_2023'
21
+
22
+ # Path for storing feedback data
23
+ FEEDBACK_FILE = 'feedback_data.json'
24
+
25
+ class EnhancedLesk:
26
+ def __init__(self):
27
+ self.feedback = self.load_feedback()
28
+ self.lemmatizer = WordNetLemmatizer()
29
+ self.stopwords = set(nltk.corpus.stopwords.words('english'))
30
+
31
+ # Try to load BERT models if available
32
+ try:
33
+ from transformers import AutoTokenizer, AutoModel
34
+ import torch
35
+
36
+ # Load pre-trained model and tokenizer
37
+ print("Loading BERT models...")
38
+ self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
39
+ self.bert_model = AutoModel.from_pretrained('bert-base-uncased')
40
+ self.bert_available = True
41
+ print("BERT models loaded successfully")
42
+ except Exception as e:
43
+ print(f"BERT models not available: {e}")
44
+ print("Continuing without BERT embeddings")
45
+ self.bert_available = False
46
+
47
+ def load_feedback(self):
48
+ if os.path.exists(FEEDBACK_FILE):
49
+ with open(FEEDBACK_FILE) as f:
50
+ return json.load(f)
51
+ return {}
52
+
53
+ def save_feedback(self):
54
+ with open(FEEDBACK_FILE, 'w') as f:
55
+ json.dump(self.feedback, f)
56
+
57
+ def get_wordnet_pos(self, treebank_tag):
58
+ """Convert POS tag to WordNet POS format"""
59
+ if treebank_tag.startswith('J'):
60
+ return wn.ADJ
61
+ elif treebank_tag.startswith('V'):
62
+ return wn.VERB
63
+ elif treebank_tag.startswith('N'):
64
+ return wn.NOUN
65
+ elif treebank_tag.startswith('R'):
66
+ return wn.ADV
67
+ else:
68
+ return None
69
+
70
+ def process_context(self, sentence, target_word):
71
+ """Process context words with positional weighting"""
72
+ words = word_tokenize(sentence.lower())
73
+
74
+ # Find target word position
75
+ target_pos = -1
76
+ for i, word in enumerate(words):
77
+ if word.lower() == target_word.lower():
78
+ target_pos = i
79
+ break
80
+
81
+ # Process context words with proximity weighting
82
+ context_words = []
83
+ for i, word in enumerate(words):
84
+ if word.isalpha() and word not in self.stopwords:
85
+ lemma = self.lemmatizer.lemmatize(word)
86
+
87
+ # Weight by proximity to target word (closer = more important)
88
+ if target_pos >= 0:
89
+ distance = abs(i - target_pos)
90
+ # Add word multiple times based on proximity (max 5 times for adjacent words)
91
+ weight = max(1, 6 - distance) if distance <= 5 else 1
92
+ context_words.extend([lemma] * weight)
93
+ else:
94
+ context_words.append(lemma)
95
+
96
+ return context_words
97
+
98
+ def calculate_overlap_score(self, sense, context):
99
+ """Calculate overlap between sense signature and context with improved weighting"""
100
+ # Create rich signature from sense
101
+ signature = []
102
+
103
+ # Add definition words (higher weight)
104
+ def_words = [w.lower() for w in word_tokenize(sense.definition())
105
+ if w.isalpha() and w not in self.stopwords]
106
+ signature.extend(def_words * 2) # Double weight for definition
107
+
108
+ # Add example words
109
+ for example in sense.examples():
110
+ ex_words = [w.lower() for w in word_tokenize(example)
111
+ if w.isalpha() and w not in self.stopwords]
112
+ signature.extend(ex_words)
113
+
114
+ # Add hypernyms, hyponyms, meronyms and holonyms
115
+ for hypernym in sense.hypernyms():
116
+ hyper_words = [w.lower() for w in word_tokenize(hypernym.definition())
117
+ if w.isalpha() and w not in self.stopwords]
118
+ signature.extend(hyper_words)
119
+
120
+ for hyponym in sense.hyponyms():
121
+ hypo_words = [w.lower() for w in word_tokenize(hyponym.definition())
122
+ if w.isalpha() and w not in self.stopwords]
123
+ signature.extend(hypo_words)
124
+
125
+ # Add meronyms and holonyms
126
+ for meronym in sense.part_meronyms() + sense.substance_meronyms():
127
+ meronym_words = [w.lower() for w in word_tokenize(meronym.definition())
128
+ if w.isalpha() and w not in self.stopwords]
129
+ signature.extend(meronym_words)
130
+
131
+ for holonym in sense.part_holonyms() + sense.substance_holonyms():
132
+ holonym_words = [w.lower() for w in word_tokenize(holonym.definition())
133
+ if w.isalpha() and w not in self.stopwords]
134
+ signature.extend(holonym_words)
135
+
136
+ # Calculate overlap using Counter for better frequency matching
137
+ context_counter = Counter(context)
138
+ signature_counter = Counter(signature)
139
+
140
+ # Calculate weighted overlap
141
+ overlap_score = 0
142
+ for word, count in context_counter.items():
143
+ if word in signature_counter:
144
+ # Score is product of frequencies
145
+ overlap_score += count * min(signature_counter[word], 5)
146
+
147
+ return overlap_score
148
+
149
+ def bert_similarity(self, sense, context_sentence, target_word):
150
+ """Calculate semantic similarity using BERT embeddings"""
151
+ if not hasattr(self, 'bert_available') or not self.bert_available:
152
+ return 0
153
+
154
+ try:
155
+ import torch
156
+
157
+ # Create context-gloss pair as in GlossBERT
158
+ gloss = sense.definition()
159
+
160
+ # Tokenize
161
+ inputs = self.tokenizer(context_sentence, gloss, return_tensors="pt",
162
+ padding=True, truncation=True, max_length=512)
163
+
164
+ # Get embeddings
165
+ with torch.no_grad():
166
+ outputs = self.bert_model(**inputs)
167
+
168
+ # Use CLS token embedding for similarity
169
+ similarity = torch.cosine_similarity(
170
+ outputs.last_hidden_state[0, 0],
171
+ outputs.last_hidden_state[0, inputs.input_ids[0].tolist().index(self.tokenizer.sep_token_id) + 1]
172
+ ).item()
173
+
174
+ return similarity * 10 # Scale up to be comparable with other scores
175
+ except Exception as e:
176
+ print(f"Error in BERT similarity calculation: {e}")
177
+ return 0
178
+
179
+ def check_collocations(self, sentence, target_word):
180
+ """Check for common collocations that indicate specific senses"""
181
+ collocations = {
182
+ "bat": {
183
+ "noun.animal": ["flying bat", "bat flying", "bat wings", "vampire bat", "fruit bat", "bat in the dark", "bat at night"],
184
+ "noun.artifact": ["baseball bat", "cricket bat", "swing the bat", "wooden bat", "hit with bat"]
185
+ },
186
+ "bank": {
187
+ "noun.artifact": ["bank account", "bank manager", "bank loan", "bank robbery", "money in bank"],
188
+ "noun.object": ["river bank", "bank of the river", "west bank", "bank erosion", "along the bank"]
189
+ },
190
+ "bass": {
191
+ "noun.animal": ["bass fish", "catch bass", "fishing bass", "largemouth bass"],
192
+ "noun.attribute": ["bass sound", "bass guitar", "bass player", "bass note", "bass drum"]
193
+ },
194
+ "spring": {
195
+ "noun.time": ["spring season", "this spring", "last spring", "spring weather", "spring flowers"],
196
+ "noun.artifact": ["metal spring", "spring coil", "spring mechanism"],
197
+ "noun.object": ["water spring", "hot spring", "spring water"]
198
+ },
199
+ "crane": {
200
+ "noun.animal": ["crane bird", "crane flew", "crane nest", "crane species"],
201
+ "noun.artifact": ["construction crane", "crane operator", "crane lifted"]
202
+ }
203
+ }
204
+
205
+ if target_word not in collocations:
206
+ return None, 0
207
+
208
+ # Check for collocations in sentence
209
+ sentence_lower = sentence.lower()
210
+ for domain, phrases in collocations[target_word].items():
211
+ for phrase in phrases:
212
+ if phrase.lower() in sentence_lower:
213
+ # Find matching sense
214
+ for sense in wn.synsets(target_word):
215
+ if sense.lexname() == domain:
216
+ return sense, 15 # Very high confidence for collocations
217
+
218
+ return None, 0
219
+
220
+ def apply_rules(self, word, context, senses):
221
+ """Apply hand-coded rules for common ambiguous words"""
222
+ word = word.lower()
223
+ context_words = set(context)
224
+
225
+ # Rules for "bat"
226
+ if word == "bat":
227
+ # Animal sense rules
228
+ animal_indicators = {"fly", "flying", "flew", "wing", "wings", "night",
229
+ "dark", "cave", "nocturnal", "mammal", "animal", "leather", "leathery"}
230
+ if any(indicator in context_words for indicator in animal_indicators):
231
+ # Find animal sense
232
+ for sense in senses:
233
+ if sense.lexname() == "noun.animal":
234
+ return 10, sense # High confidence boost
235
+
236
+ # Sports equipment rules
237
+ sports_indicators = {"hit", "swing", "ball", "baseball", "cricket",
238
+ "player", "game", "sport", "team", "wooden"}
239
+ if any(indicator in context_words for indicator in sports_indicators):
240
+ # Find artifact sense
241
+ for sense in senses:
242
+ if sense.lexname() == "noun.artifact":
243
+ return 8, sense # High confidence boost
244
+
245
+ # Rules for "bank"
246
+ elif word == "bank":
247
+ # Financial institution rules
248
+ finance_indicators = {"money", "account", "deposit", "withdraw", "loan",
249
+ "credit", "debit", "financial", "cash", "check"}
250
+ if any(indicator in context_words for indicator in finance_indicators):
251
+ for sense in senses:
252
+ if "financial" in sense.definition() or "money" in sense.definition():
253
+ return 10, sense
254
+
255
+ # River bank rules
256
+ river_indicators = {"river", "stream", "water", "flow", "shore", "beach"}
257
+ if any(indicator in context_words for indicator in river_indicators):
258
+ for sense in senses:
259
+ if "river" in sense.definition() or "stream" in sense.definition():
260
+ return 10, sense
261
+
262
+ # Rules for "bass"
263
+ elif word == "bass":
264
+ # Fish sense rules
265
+ fish_indicators = {"fish", "fishing", "catch", "caught", "water", "lake", "river"}
266
+ if any(indicator in context_words for indicator in fish_indicators):
267
+ for sense in senses:
268
+ if sense.lexname() == "noun.animal":
269
+ return 10, sense
270
+
271
+ # Sound/music sense rules
272
+ music_indicators = {"music", "sound", "guitar", "player", "band", "note", "tone", "instrument", "concert", "loud"}
273
+ if any(indicator in context_words for indicator in music_indicators):
274
+ for sense in senses:
275
+ if sense.lexname() == "noun.attribute" or "music" in sense.definition():
276
+ return 10, sense
277
+
278
+ # No rule matched with high confidence
279
+ return 0, None
280
+
281
+ def safe_compare_synsets(self, synset1, synset2):
282
+ """Safely compare two synsets, handling None values."""
283
+ if synset1 is None or synset2 is None:
284
+ return synset1 is synset2 # True only if both are None
285
+
286
+ # Use the built-in equality check for synsets
287
+ try:
288
+ return synset1 == synset2
289
+ except AttributeError:
290
+ return False # If comparison fails, they're not equal
291
+
292
+ def disambiguate(self, sentence, word):
293
+ """Disambiguate a word in a given sentence context"""
294
+ word = word.lower()
295
+
296
+ # Get POS tag for the target word
297
+ word_tokens = word_tokenize(sentence)
298
+ pos_tags = pos_tag(word_tokens)
299
+ word_pos = None
300
+
301
+ for token, pos in pos_tags:
302
+ if token.lower() == word:
303
+ word_pos = self.get_wordnet_pos(pos)
304
+ break
305
+
306
+ # Get senses filtered by POS if available
307
+ if word_pos:
308
+ senses = [s for s in wn.synsets(word) if s.pos() == word_pos]
309
+ if not senses:
310
+ senses = wn.synsets(word)
311
+ else:
312
+ senses = wn.synsets(word)
313
+
314
+ if not senses:
315
+ return None, []
316
+
317
+ # Process context with positional weighting
318
+ context = self.process_context(sentence, word)
319
+
320
+ # 1. Check for collocations first (highest priority)
321
+ collocation_sense, collocation_score = self.check_collocations(sentence, word)
322
+ if collocation_sense and collocation_score > 0:
323
+ # Return the collocation sense and remaining senses as alternatives
324
+ top_senses = [s for s in senses if not self.safe_compare_synsets(s, collocation_sense)][:3]
325
+ return collocation_sense, top_senses
326
+
327
+ # 2. Apply rules for common ambiguous words
328
+ rule_score, rule_sense = self.apply_rules(word, context, senses)
329
+
330
+ # Score each sense
331
+ scored_senses = []
332
+ for sense in senses:
333
+ # If this sense was selected by rules, add the rule score
334
+ # FIX: Use safe comparison to prevent AttributeError
335
+ rule_boost = rule_score if (rule_sense is not None and self.safe_compare_synsets(sense, rule_sense)) else 0
336
+
337
+ # Calculate base score using overlap
338
+ overlap_score = self.calculate_overlap_score(sense, context)
339
+
340
+ # Calculate BERT similarity if available
341
+ bert_score = 0
342
+ if hasattr(self, 'bert_available') and self.bert_available:
343
+ bert_score = self.bert_similarity(sense, sentence, word)
344
+
345
+ # Apply feedback boost if available
346
+ feedback_key = f"{word}_{hash(sentence) % 10000}"
347
+ feedback_score = self.feedback.get(feedback_key, {}).get(sense.name(), 0)
348
+
349
+ # Calculate final score as weighted combination
350
+ final_score = (
351
+ overlap_score * 0.4 +
352
+ bert_score * 0.3 +
353
+ rule_boost * 0.2 +
354
+ feedback_score * 0.1
355
+ )
356
+
357
+ scored_senses.append((final_score, sense))
358
+
359
+ scored_senses.sort(reverse=True, key=lambda x: x[0])
360
+
361
+ if not scored_senses:
362
+ return None, []
363
+
364
+ best_sense = scored_senses[0][1]
365
+ top_senses = [s[1] for s in scored_senses[1:4]]
366
+ return best_sense, top_senses
367
+
368
+ def add_feedback(self, word, context, correct_sense):
369
+ """Store user feedback to improve future disambiguation"""
370
+ # Create a key based on word and hashed context
371
+ context_str = ' '.join(context[:10]) # Use first 10 context words
372
+ key = f"{word}_{hash(context_str) % 10000}"
373
+
374
+ if key not in self.feedback:
375
+ self.feedback[key] = {}
376
+
377
+ # Increase score for the correct sense
378
+ self.feedback[key][correct_sense] = self.feedback[key].get(correct_sense, 0) + 5
379
+
380
+ # Optionally decrease scores for other senses
381
+ for sense in wn.synsets(word):
382
+ if sense.name() != correct_sense and sense.name() in self.feedback[key]:
383
+ self.feedback[key][sense.name()] = max(0, self.feedback[key][sense.name()] - 1)
384
+
385
+ self.save_feedback()
386
+
387
+ # Return the updated sense information
388
+ for sense in wn.synsets(word):
389
+ if sense.name() == correct_sense:
390
+ return {
391
+ 'definition': sense.definition(),
392
+ 'examples': sense.examples()
393
+ }
394
+
395
+ return None
396
+
397
+ # Initialize the Lesk processor
398
+ lesk_processor = EnhancedLesk()
399
+
400
+ @app.route('/', methods=['GET', 'POST'])
401
+ def index():
402
+ if request.method == 'POST':
403
+ text = request.form['text']
404
+ target_word = request.form.get('target_word', '')
405
+ return redirect(url_for('results', text=text, word=target_word))
406
+ return render_template('index.html')
407
+
408
+ @app.route('/results')
409
+ def results():
410
+ text = request.args.get('text', '')
411
+ target_word = request.args.get('word', '').lower()
412
+
413
+ if not target_word:
414
+ # Find ambiguous words (with multiple senses)
415
+ words = word_tokenize(text.lower())
416
+ ambiguous_words = []
417
+ for word in words:
418
+ if word.isalpha() and len(wn.synsets(word)) > 1:
419
+ ambiguous_words.append(word)
420
+
421
+ # If there are ambiguous words, use the first one
422
+ if ambiguous_words:
423
+ target_word = ambiguous_words[0]
424
+
425
+ best_sense = None
426
+ top_senses = []
427
+ highlighted_text = text
428
+ sentence = ""
429
+ context_words = []
430
+
431
+ if target_word:
432
+ sentences = sent_tokenize(text)
433
+ for sent in sentences:
434
+ if re.search(r'\b' + re.escape(target_word) + r'\b', sent, re.I):
435
+ sentence = sent
436
+ context_words = lesk_processor.process_context(sent, target_word)
437
+ try:
438
+ best_sense, top_senses = lesk_processor.disambiguate(sent, target_word)
439
+ except Exception as e:
440
+ print(f"Disambiguation error: {e}")
441
+ return render_template('error.html',
442
+ error_message=f"Could not disambiguate the word '{target_word}'. Please try a different word or sentence.",
443
+ error_details=str(e))
444
+
445
+ highlighted_text = re.sub(
446
+ r'\b' + re.escape(target_word) + r'\b',
447
+ f'<span class="highlight-word">{target_word}</span>',
448
+ text,
449
+ flags=re.IGNORECASE
450
+ )
451
+ break
452
+
453
+ # Store in session for feedback
454
+ if best_sense:
455
+ session['last_disambiguation'] = {
456
+ 'word': target_word,
457
+ 'context': context_words,
458
+ 'sentence': sentence
459
+ }
460
+
461
+ return render_template('results.html',
462
+ text=text,
463
+ highlighted_text=highlighted_text,
464
+ target_word=target_word,
465
+ best_sense=best_sense,
466
+ top_senses=top_senses,
467
+ sentence=sentence,
468
+ context_words=', '.join([w for w in set(context_words)][:10])) # Show unique context words
469
+
470
+ @app.route('/feedback', methods=['POST'])
471
+ def feedback():
472
+ data = request.get_json()
473
+ word = data.get('word')
474
+ context = data.get('context', [])
475
+ correct_sense = data.get('correct_sense')
476
+
477
+ if word and correct_sense:
478
+ updated_sense = lesk_processor.add_feedback(word, context, correct_sense)
479
+ return jsonify(updated_sense)
480
+
481
+ return jsonify({'error': 'Invalid feedback data'}), 400
482
+
483
+ @app.route('/lesk-explained')
484
+ def lesk_explained():
485
+ return render_template('lesk_explained.html')
486
+
487
+ # Add error template handler
488
+ @app.route('/error')
489
+ def error():
490
+ error_message = request.args.get('message', 'An unknown error occurred')
491
+ error_details = request.args.get('details', '')
492
+ return render_template('error.html', error_message=error_message, error_details=error_details)
493
+
494
+ if __name__ == '__main__':
495
+ app.run(debug=True)
templates/error.html ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/error.html -->
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Error - Word Sense Disambiguation Tool</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ background-color: #f8f9fa;
11
+ }
12
+ .navbar {
13
+ box-shadow: 0 2px 4px rgba(0,0,0,0.07);
14
+ }
15
+ .main-container {
16
+ max-width: 800px;
17
+ margin: 0 auto;
18
+ padding: 2rem;
19
+ background-color: white;
20
+ border-radius: 8px;
21
+ box-shadow: 0 2px 10px rgba(0,0,0,0.05);
22
+ }
23
+ .error-details {
24
+ background-color: #f5f5f5;
25
+ padding: 1rem;
26
+ border-radius: 4px;
27
+ font-family: monospace;
28
+ white-space: pre-wrap;
29
+ margin-top: 1rem;
30
+ }
31
+ </style>
32
+ </head>
33
+ <body>
34
+ <!-- Navbar -->
35
+ <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
36
+ <div class="container">
37
+ <a class="navbar-brand" href="/">WSD Tool</a>
38
+ <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
39
+ <span class="navbar-toggler-icon"></span>
40
+ </button>
41
+ <div class="collapse navbar-collapse" id="navbarNav">
42
+ <ul class="navbar-nav ms-auto">
43
+ <li class="nav-item">
44
+ <a href="{{ url_for('index') }}" class="btn btn-outline-primary">
45
+ ← Back to Input
46
+ </a>
47
+ </li>
48
+ </ul>
49
+ </div>
50
+ </div>
51
+ </nav>
52
+
53
+ <div class="container main-container">
54
+ <div class="text-center mb-4">
55
+ <h2 class="text-danger">Error</h2>
56
+ </div>
57
+
58
+ <div class="alert alert-danger">
59
+ {{ error_message }}
60
+ </div>
61
+
62
+ {% if error_details %}
63
+ <div class="error-details">
64
+ {{ error_details }}
65
+ </div>
66
+ {% endif %}
67
+
68
+ <div class="mt-4">
69
+ <p>You can try the following:</p>
70
+ <ul>
71
+ <li>Use a different word or sentence</li>
72
+ <li>Make sure the word has multiple meanings in WordNet</li>
73
+ <li>Provide more context around the ambiguous word</li>
74
+ </ul>
75
+ </div>
76
+
77
+ <div class="text-center mt-5">
78
+ <a href="{{ url_for('index') }}" class="btn btn-primary">Return to Input</a>
79
+ </div>
80
+ </div>
81
+
82
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
83
+ </body>
84
+ </html>
templates/index.html ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/index.html -->
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Word Sense Disambiguation Tool</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ background-color: #f8f9fa;
11
+ }
12
+ .navbar {
13
+ box-shadow: 0 2px 4px rgba(0,0,0,0.07);
14
+ }
15
+ .main-container {
16
+ max-width: 800px;
17
+ margin: 0 auto;
18
+ padding: 2rem;
19
+ background-color: white;
20
+ border-radius: 8px;
21
+ box-shadow: 0 2px 10px rgba(0,0,0,0.05);
22
+ }
23
+ .form-control:focus {
24
+ border-color: #6c757d;
25
+ box-shadow: 0 0 0 0.25rem rgba(108, 117, 125, 0.25);
26
+ }
27
+ .example-btn {
28
+ margin-right: 0.5rem;
29
+ margin-bottom: 0.5rem;
30
+ }
31
+ </style>
32
+ </head>
33
+ <body>
34
+ <!-- Navbar with Lesk Algorithm Explanation Link -->
35
+ <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
36
+ <div class="container">
37
+ <a class="navbar-brand" href="/">WSD Tool</a>
38
+ <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
39
+ <span class="navbar-toggler-icon"></span>
40
+ </button>
41
+ <div class="collapse navbar-collapse" id="navbarNav">
42
+ <ul class="navbar-nav ms-auto">
43
+ <li class="nav-item">
44
+ <a class="nav-link" href="{{ url_for('lesk_explained') }}">
45
+ Learn about Lesk Algorithm and Working
46
+ </a>
47
+ </li>
48
+ </ul>
49
+ </div>
50
+ </div>
51
+ </nav>
52
+
53
+ <div class="container main-container">
54
+ <h2 class="mb-4 text-center">Word Sense Disambiguation</h2>
55
+ <p class="lead text-center mb-4">
56
+ Enter text with ambiguous words to disambiguate their meanings
57
+ </p>
58
+
59
+ <form action="{{ url_for('index') }}" method="post">
60
+ <div class="mb-3">
61
+ <label for="text" class="form-label">Text:</label>
62
+ <textarea class="form-control" id="text" name="text" rows="5" required></textarea>
63
+ </div>
64
+ <div class="mb-3">
65
+ <label for="target_word" class="form-label">
66
+ Target Word (optional):
67
+ <small class="text-muted">If left empty, the first ambiguous word will be selected</small>
68
+ </label>
69
+ <input type="text" class="form-control" id="target_word" name="target_word">
70
+ </div>
71
+ <div class="d-grid gap-2">
72
+ <button type="submit" class="btn btn-primary">Disambiguate</button>
73
+ </div>
74
+ </form>
75
+
76
+ <div class="mt-4">
77
+ <h5>Example Sentences:</h5>
78
+ <div class="d-flex flex-wrap">
79
+ <button class="btn btn-sm btn-outline-secondary example-btn"
80
+ onclick="fillExample('She saw a bat flying in the dark.', 'bat')">
81
+ Bat (animal)
82
+ </button>
83
+ <button class="btn btn-sm btn-outline-secondary example-btn"
84
+ onclick="fillExample('The baseball player swung the bat.', 'bat')">
85
+ Bat (sports)
86
+ </button>
87
+ <button class="btn btn-sm btn-outline-secondary example-btn"
88
+ onclick="fillExample('The bat had leathery wings and flew silently.', 'bat')">
89
+ Bat (with wings)
90
+ </button>
91
+ <button class="btn btn-sm btn-outline-secondary example-btn"
92
+ onclick="fillExample('I need to go to the bank to deposit some money.', 'bank')">
93
+ Bank (financial)
94
+ </button>
95
+ <button class="btn btn-sm btn-outline-secondary example-btn"
96
+ onclick="fillExample('We sat by the river bank and had a picnic.', 'bank')">
97
+ Bank (riverside)
98
+ </button>
99
+ <button class="btn btn-sm btn-outline-secondary example-btn"
100
+ onclick="fillExample('The bass was too loud during the concert.', 'bass')">
101
+ Bass (sound)
102
+ </button>
103
+ <button class="btn btn-sm btn-outline-secondary example-btn"
104
+ onclick="fillExample('He caught a large bass while fishing.', 'bass')">
105
+ Bass (fish)
106
+ </button>
107
+ <button class="btn btn-sm btn-outline-secondary example-btn"
108
+ onclick="fillExample('Spring is my favorite season of the year.', 'spring')">
109
+ Spring (season)
110
+ </button>
111
+ <button class="btn btn-sm btn-outline-secondary example-btn"
112
+ onclick="fillExample('The spring in the mattress was broken.', 'spring')">
113
+ Spring (coil)
114
+ </button>
115
+ </div>
116
+ </div>
117
+ </div>
118
+
119
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
120
+ <script>
121
+ function fillExample(text, word) {
122
+ document.getElementById('text').value = text;
123
+ document.getElementById('target_word').value = word;
124
+ }
125
+ </script>
126
+ </body>
127
+ </html>
templates/lesk_explained.html ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/lesk_explained.html -->
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Lesk Algorithm Explained</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ background-color: #f8f9fa;
11
+ }
12
+ .navbar {
13
+ box-shadow: 0 2px 4px rgba(0,0,0,0.07);
14
+ }
15
+ .main-container {
16
+ max-width: 800px;
17
+ margin: 0 auto;
18
+ padding: 2rem;
19
+ background-color: white;
20
+ border-radius: 8px;
21
+ box-shadow: 0 2px 10px rgba(0,0,0,0.05);
22
+ }
23
+ .code-block {
24
+ background-color: #f5f5f5;
25
+ padding: 1rem;
26
+ border-radius: 4px;
27
+ font-family: monospace;
28
+ white-space: pre-wrap;
29
+ }
30
+ .algorithm-step {
31
+ background-color: #e9ecef;
32
+ padding: 1rem;
33
+ border-radius: 6px;
34
+ margin-bottom: 1rem;
35
+ }
36
+ .enhancement {
37
+ background-color: #e3f2fd;
38
+ border-left: 4px solid #2196f3;
39
+ padding: 1rem;
40
+ margin-bottom: 1rem;
41
+ }
42
+ </style>
43
+ </head>
44
+ <body>
45
+ <!-- Navbar -->
46
+ <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
47
+ <div class="container">
48
+ <a class="navbar-brand" href="/">WSD Tool</a>
49
+ <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
50
+ <span class="navbar-toggler-icon"></span>
51
+ </button>
52
+ <div class="collapse navbar-collapse" id="navbarNav">
53
+ <ul class="navbar-nav ms-auto">
54
+ <li class="nav-item">
55
+ <a href="{{ url_for('index') }}" class="btn btn-outline-primary">
56
+ ← Back to Tool
57
+ </a>
58
+ </li>
59
+ </ul>
60
+ </div>
61
+ </div>
62
+ </nav>
63
+
64
+ <div class="container main-container">
65
+ <h2 class="mb-4">The Enhanced Lesk Algorithm for Word Sense Disambiguation</h2>
66
+
67
+ <div class="mb-4">
68
+ <h4>What is Word Sense Disambiguation?</h4>
69
+ <p>
70
+ Word Sense Disambiguation (WSD) is the task of identifying which sense of a word is used in a sentence when the word has multiple meanings. For example, the word "bat" can refer to a flying mammal or a piece of sports equipment.
71
+ </p>
72
+ </div>
73
+
74
+ <div class="mb-4">
75
+ <h4>The Original Lesk Algorithm</h4>
76
+ <p>
77
+ The Lesk algorithm, introduced by Michael Lesk in 1986, is one of the earliest and most influential algorithms for WSD. It uses dictionary definitions to determine the correct sense of a word in context.
78
+ </p>
79
+
80
+ <div class="algorithm-step">
81
+ <h5>Basic Idea:</h5>
82
+ <p>The sense whose dictionary definition shares the most words with the context is likely the correct sense.</p>
83
+ </div>
84
+ </div>
85
+
86
+ <div class="mb-4">
87
+ <h4>Our Enhanced Lesk Implementation</h4>
88
+ <p>Our implementation extends the original Lesk algorithm with several modern enhancements:</p>
89
+
90
+ <div class="enhancement">
91
+ <h5>1. Rich Sense Signatures</h5>
92
+ <p>We expand the sense signature beyond just definitions to include:</p>
93
+ <ul>
94
+ <li>Example sentences from WordNet</li>
95
+ <li>Hypernyms (parent concepts)</li>
96
+ <li>Hyponyms (child concepts)</li>
97
+ <li>Meronyms and holonyms (part-whole relationships)</li>
98
+ </ul>
99
+ </div>
100
+
101
+ <div class="enhancement">
102
+ <h5>2. BERT Integration</h5>
103
+ <p>We incorporate BERT contextual embeddings to capture deeper semantic relationships between the context and sense definitions.</p>
104
+ </div>
105
+
106
+ <div class="enhancement">
107
+ <h5>3. Rule-Based Components</h5>
108
+ <p>For common ambiguous words, we add targeted rules to handle cases where statistical methods might fail.</p>
109
+ </div>
110
+
111
+ <div class="enhancement">
112
+ <h5>4. Collocation Detection</h5>
113
+ <p>We identify common word combinations (collocations) that strongly indicate specific senses.</p>
114
+ </div>
115
+
116
+ <div class="enhancement">
117
+ <h5>5. Adaptive Learning</h5>
118
+ <p>The system learns from user feedback to improve future disambiguations of similar contexts.</p>
119
+ </div>
120
+ </div>
121
+
122
+ <div class="mb-4">
123
+ <h4>How Our Algorithm Works</h4>
124
+
125
+ <div class="algorithm-step">
126
+ <h5>Step 1: Context Processing</h5>
127
+ <p>Extract and process context words from the sentence, giving more weight to words closer to the target word.</p>
128
+ </div>
129
+
130
+ <div class="algorithm-step">
131
+ <h5>Step 2: Collocation Check</h5>
132
+ <p>Check for strong collocations that directly indicate a specific sense (e.g., "bat flying" strongly indicates the animal sense).</p>
133
+ </div>
134
+
135
+ <div class="algorithm-step">
136
+ <h5>Step 3: Rule Application</h5>
137
+ <p>Apply targeted rules for common ambiguous words based on contextual indicators.</p>
138
+ </div>
139
+
140
+ <div class="algorithm-step">
141
+ <h5>Step 4: Sense Signature Creation</h5>
142
+ <p>For each possible sense, create a rich signature from definitions, examples, and related concepts.</p>
143
+ </div>
144
+
145
+ <div class="algorithm-step">
146
+ <h5>Step 5: Overlap Calculation</h5>
147
+ <p>Calculate the weighted overlap between context words and each sense signature.</p>
148
+ </div>
149
+
150
+ <div class="algorithm-step">
151
+ <h5>Step 6: BERT Similarity</h5>
152
+ <p>Calculate semantic similarity between the context and each sense definition using BERT embeddings.</p>
153
+ </div>
154
+
155
+ <div class="algorithm-step">
156
+ <h5>Step 7: Score Combination</h5>
157
+ <p>Combine all scores (overlap, BERT, rules, feedback) to determine the most likely sense.</p>
158
+ </div>
159
+ </div>
160
+
161
+ <div class="mb-4">
162
+ <h4>Example</h4>
163
+ <p>For the sentence "She saw a bat flying in the dark":</p>
164
+
165
+ <div class="code-block">
166
+ Target word: "bat"
167
+
168
+ Possible senses:
169
+ 1. "a nocturnal mammal with wings"
170
+ 2. "a implement used for hitting a ball in sports"
171
+
172
+ Context words: [she, saw, flying, dark]
173
+
174
+ Collocation check: "bat flying" → strong indicator of animal sense
175
+ Rule application: "flying" → animal sense rule triggered
176
+
177
+ Sense 1 signature: [nocturnal, mammal, wing, fly, night, animal, cave, ...]
178
+ Sense 2 signature: [implement, hit, ball, sport, game, baseball, cricket, ...]
179
+
180
+ Overlap scores:
181
+ - Sense 1: High overlap with "flying" and "dark" (related to nocturnal, night)
182
+ - Sense 2: Low overlap with context words
183
+
184
+ BERT similarity:
185
+ - Sense 1: High similarity between "bat flying in the dark" and "nocturnal mammal with wings"
186
+ - Sense 2: Lower similarity with sports equipment definition
187
+
188
+ Final scores:
189
+ - Sense 1 (animal): 8.7
190
+ - Sense 2 (sports): 2.3
191
+
192
+ Result: Sense 1 is selected as the correct meaning.</div>
193
+ </div>
194
+
195
+ <div class="mb-4">
196
+ <h4>Advantages Over Basic Lesk</h4>
197
+ <ul>
198
+ <li>Higher accuracy for common ambiguous words</li>
199
+ <li>Better handling of contextual nuances</li>
200
+ <li>Integration of modern NLP techniques</li>
201
+ <li>Adaptive learning from user feedback</li>
202
+ <li>Combination of statistical and rule-based approaches</li>
203
+ </ul>
204
+ </div>
205
+
206
+ <div class="text-center mt-5">
207
+ <a href="{{ url_for('index') }}" class="btn btn-primary">Try the WSD Tool</a>
208
+ </div>
209
+ </div>
210
+
211
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
212
+ </body>
213
+ </html>
templates/results.html ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/results.html -->
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Disambiguation Results</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <link
8
+ href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css"
9
+ rel="stylesheet"
10
+ >
11
+ <style>
12
+ .highlight-word {
13
+ background-color: #FFD700;
14
+ padding: 2px 5px;
15
+ border-radius: 3px;
16
+ font-weight: bold;
17
+ }
18
+ .navbar {
19
+ box-shadow: 0 2px 4px rgba(0,0,0,0.07);
20
+ }
21
+ .card {
22
+ margin-bottom: 1.5rem;
23
+ box-shadow: 0 2px 8px rgba(0,0,0,0.05);
24
+ }
25
+ #updatedSense {
26
+ display: none;
27
+ }
28
+ .context-badge {
29
+ margin-right: 5px;
30
+ margin-bottom: 5px;
31
+ background-color: #e9ecef;
32
+ color: #495057;
33
+ }
34
+ .lexname-badge {
35
+ background-color: #17a2b8;
36
+ color: white;
37
+ }
38
+ .sense-card {
39
+ transition: all 0.3s ease;
40
+ }
41
+ .sense-card:hover {
42
+ transform: translateY(-5px);
43
+ box-shadow: 0 4px 12px rgba(0,0,0,0.1);
44
+ }
45
+ .algorithm-info {
46
+ font-size: 0.9rem;
47
+ color: #6c757d;
48
+ }
49
+ </style>
50
+ </head>
51
+ <body>
52
+ <!-- Navbar with Lesk Algorithm Explanation Link -->
53
+ <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
54
+ <div class="container">
55
+ <a class="navbar-brand" href="/">WSD Tool</a>
56
+ <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
57
+ <span class="navbar-toggler-icon"></span>
58
+ </button>
59
+ <div class="collapse navbar-collapse" id="navbarNav">
60
+ <ul class="navbar-nav ms-auto align-items-center">
61
+ <li class="nav-item">
62
+ <a class="nav-link" href="{{ url_for('lesk_explained') }}">
63
+ Learn about Lesk Algorithm
64
+ </a>
65
+ </li>
66
+ <li class="nav-item">
67
+ <a href="{{ url_for('index') }}" class="btn btn-outline-primary ms-2">
68
+ ← Back to Input
69
+ </a>
70
+ </li>
71
+ </ul>
72
+ </div>
73
+ </div>
74
+ </nav>
75
+
76
+ <div class="container">
77
+ <!-- Original Text -->
78
+ <div class="mb-4">
79
+ <h5>Original Text:</h5>
80
+ <div class="p-3 bg-light rounded">
81
+ {{ highlighted_text|safe }}
82
+ </div>
83
+ </div>
84
+
85
+ {% if best_sense %}
86
+ <!-- Selected Sense Card -->
87
+ <div class="card sense-card" id="selectedSense">
88
+ <div class="card-header bg-primary text-white">
89
+ Selected Sense
90
+ </div>
91
+ <div class="card-body">
92
+ <h5 id="senseDefinition">{{ best_sense.definition() }}</h5>
93
+ <p class="text-muted">Lexical Category: <span class="badge lexname-badge">{{ best_sense.lexname() }}</span></p>
94
+ {% if best_sense.examples() %}
95
+ <div class="mt-2">
96
+ <strong>Examples:</strong>
97
+ <ul id="senseExamples">
98
+ {% for example in best_sense.examples() %}
99
+ <li>{{ example }}</li>
100
+ {% endfor %}
101
+ </ul>
102
+ </div>
103
+ {% endif %}
104
+
105
+ <!-- Show context words that influenced the decision -->
106
+ <div class="mt-3">
107
+ <strong>Context words used:</strong>
108
+ <div class="mt-2">
109
+ {% for word in context_words.split(', ') %}
110
+ <span class="badge context-badge">{{ word }}</span>
111
+ {% endfor %}
112
+ </div>
113
+ </div>
114
+
115
+ <div class="mt-3 algorithm-info">
116
+ <p>This sense was selected using Enhanced Lesk algorithm with BERT semantic similarity and rule-based components.</p>
117
+ </div>
118
+ </div>
119
+ </div>
120
+
121
+ <!-- Updated Sense Section (Initially Hidden) -->
122
+ <div class="card border-success mb-4 sense-card" id="updatedSense">
123
+ <div class="card-header bg-success text-white">
124
+ Updated Sense (Based on Feedback)
125
+ </div>
126
+ <div class="card-body">
127
+ <h5 id="updatedDefinition"></h5>
128
+ <div class="mt-2">
129
+ <strong>Examples:</strong>
130
+ <ul id="updatedExamples"></ul>
131
+ </div>
132
+ <div class="mt-3 algorithm-info">
133
+ <p>Your feedback has been recorded and will improve future disambiguations.</p>
134
+ </div>
135
+ </div>
136
+ </div>
137
+
138
+ <!-- Top 3 Alternatives -->
139
+ <h5 class="mt-4">Top 3 Alternative Senses:</h5>
140
+ {% for sense in top_senses %}
141
+ <div class="card mb-3 sense-card">
142
+ <div class="card-body">
143
+ <p><strong>{{ sense.definition() }}</strong></p>
144
+ <p class="text-muted small">Lexical Category: <span class="badge lexname-badge">{{ sense.lexname() }}</span></p>
145
+ {% if sense.examples() %}
146
+ <p class="small">Example: "{{ sense.examples()[0] }}"</p>
147
+ {% endif %}
148
+ <button class="btn btn-sm btn-outline-primary feedback-btn"
149
+ data-sense="{{ sense.name() }}">
150
+ This is the correct meaning
151
+ </button>
152
+ </div>
153
+ </div>
154
+ {% endfor %}
155
+ {% else %}
156
+ <div class="alert alert-warning mt-4">
157
+ No ambiguous words detected in the text.
158
+ </div>
159
+ {% endif %}
160
+ </div>
161
+
162
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
163
+ <script>
164
+ document.querySelectorAll('.feedback-btn').forEach(btn => {
165
+ btn.addEventListener('click', async () => {
166
+ const senseName = btn.dataset.sense;
167
+ const word = "{{ target_word }}";
168
+ const context = "{{ sentence }}".toLowerCase().split(/[^a-z]+/).filter(w => w !== "");
169
+
170
+ try {
171
+ const response = await fetch('/feedback', {
172
+ method: 'POST',
173
+ headers: { 'Content-Type': 'application/json' },
174
+ body: JSON.stringify({ word, context, correct_sense: senseName })
175
+ });
176
+
177
+ const updatedSense = await response.json();
178
+
179
+ // Show updated sense section
180
+ const updatedSection = document.getElementById('updatedSense');
181
+ document.getElementById('updatedDefinition').textContent = updatedSense.definition;
182
+
183
+ const examplesList = document.getElementById('updatedExamples');
184
+ examplesList.innerHTML = '';
185
+ if(updatedSense.examples && updatedSense.examples.length > 0) {
186
+ updatedSense.examples.forEach(example => {
187
+ const li = document.createElement('li');
188
+ li.textContent = example;
189
+ examplesList.appendChild(li);
190
+ });
191
+ }
192
+
193
+ updatedSection.style.display = 'block';
194
+ setTimeout(() => {
195
+ window.scrollTo({
196
+ top: updatedSection.offsetTop - 100,
197
+ behavior: 'smooth'
198
+ });
199
+ }, 100);
200
+
201
+ } catch (error) {
202
+ console.error('Feedback error:', error);
203
+ }
204
+ });
205
+ });
206
+ </script>
207
+ </body>
208
+ </html>