Spaces:

gkc55
/

NLP_WSD

Sleeping

gkc55 commited on May 13, 2025

Commit

3fe126f

0 Parent(s):

Add Flask-based Word Sense Disambiguation Tool with Enhanced Lesk Algorithm

- Implemented a web application using Flask for word sense disambiguation.
- Added Enhanced Lesk algorithm with BERT integration for improved disambiguation accuracy.
- Created templates for input, results, error handling, and explanation of the Lesk algorithm.
- Included user feedback mechanism to adapt and improve disambiguation over time.
- Added example sentences for common ambiguous words to assist users.
- Established a feedback system to record user corrections and enhance future performance.
- Included necessary dependencies in requirements.txt for Flask, NLTK, Transformers, and PyTorch.

Files changed (10) hide show

app.py +495 -0
code.txt +495 -0
feedback_data.json +1 -0
flow.py +53 -0
requirements.txt +5 -0
tempCodeRunnerFile.py +495 -0
templates/error.html +84 -0
templates/index.html +127 -0
templates/lesk_explained.html +213 -0
templates/results.html +208 -0

app.py ADDED Viewed

	@@ -0,0 +1,495 @@

+from flask import Flask, render_template, request, redirect, url_for, jsonify, session
+import nltk
+from nltk.corpus import wordnet as wn
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.tag import pos_tag
+from nltk.stem import WordNetLemmatizer
+from collections import Counter
+import re
+import os
+import json
+import random
+# Download required NLTK resources
+nltk.download('wordnet')
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('stopwords')
+app = Flask(__name__)
+app.secret_key = 'wsd_secret_key_2023'
+# Path for storing feedback data
+FEEDBACK_FILE = 'feedback_data.json'
+class EnhancedLesk:
+    def __init__(self):
+        self.feedback = self.load_feedback()
+        self.lemmatizer = WordNetLemmatizer()
+        self.stopwords = set(nltk.corpus.stopwords.words('english'))
+        # Try to load BERT models if available
+        try:
+            from transformers import AutoTokenizer, AutoModel
+            import torch
+            # Load pre-trained model and tokenizer
+            print("Loading BERT models...")
+            self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+            self.bert_model = AutoModel.from_pretrained('bert-base-uncased')
+            self.bert_available = True
+            print("BERT models loaded successfully")
+        except Exception as e:
+            print(f"BERT models not available: {e}")
+            print("Continuing without BERT embeddings")
+            self.bert_available = False
+    def load_feedback(self):
+        if os.path.exists(FEEDBACK_FILE):
+            with open(FEEDBACK_FILE) as f:
+                return json.load(f)
+        return {}
+    def save_feedback(self):
+        with open(FEEDBACK_FILE, 'w') as f:
+            json.dump(self.feedback, f)
+    def get_wordnet_pos(self, treebank_tag):
+        """Convert POS tag to WordNet POS format"""
+        if treebank_tag.startswith('J'):
+            return wn.ADJ
+        elif treebank_tag.startswith('V'):
+            return wn.VERB
+        elif treebank_tag.startswith('N'):
+            return wn.NOUN
+        elif treebank_tag.startswith('R'):
+            return wn.ADV
+        else:
+            return None
+    def process_context(self, sentence, target_word):
+        """Process context words with positional weighting"""
+        words = word_tokenize(sentence.lower())
+        # Find target word position
+        target_pos = -1
+        for i, word in enumerate(words):
+            if word.lower() == target_word.lower():
+                target_pos = i
+                break
+        # Process context words with proximity weighting
+        context_words = []
+        for i, word in enumerate(words):
+            if word.isalpha() and word not in self.stopwords:
+                lemma = self.lemmatizer.lemmatize(word)
+                # Weight by proximity to target word (closer = more important)
+                if target_pos >= 0:
+                    distance = abs(i - target_pos)
+                    # Add word multiple times based on proximity (max 5 times for adjacent words)
+                    weight = max(1, 6 - distance) if distance <= 5 else 1
+                    context_words.extend([lemma] * weight)
+                else:
+                    context_words.append(lemma)
+        return context_words
+    def calculate_overlap_score(self, sense, context):
+        """Calculate overlap between sense signature and context with improved weighting"""
+        # Create rich signature from sense
+        signature = []
+        # Add definition words (higher weight)
+        def_words = [w.lower() for w in word_tokenize(sense.definition())
+                    if w.isalpha() and w not in self.stopwords]
+        signature.extend(def_words * 2)  # Double weight for definition
+        # Add example words
+        for example in sense.examples():
+            ex_words = [w.lower() for w in word_tokenize(example)
+                       if w.isalpha() and w not in self.stopwords]
+            signature.extend(ex_words)
+        # Add hypernyms, hyponyms, meronyms and holonyms
+        for hypernym in sense.hypernyms():
+            hyper_words = [w.lower() for w in word_tokenize(hypernym.definition())
+                          if w.isalpha() and w not in self.stopwords]
+            signature.extend(hyper_words)
+        for hyponym in sense.hyponyms():
+            hypo_words = [w.lower() for w in word_tokenize(hyponym.definition())
+                         if w.isalpha() and w not in self.stopwords]
+            signature.extend(hypo_words)
+        # Add meronyms and holonyms
+        for meronym in sense.part_meronyms() + sense.substance_meronyms():
+            meronym_words = [w.lower() for w in word_tokenize(meronym.definition())
+                            if w.isalpha() and w not in self.stopwords]
+            signature.extend(meronym_words)
+        for holonym in sense.part_holonyms() + sense.substance_holonyms():
+            holonym_words = [w.lower() for w in word_tokenize(holonym.definition())
+                            if w.isalpha() and w not in self.stopwords]
+            signature.extend(holonym_words)
+        # Calculate overlap using Counter for better frequency matching
+        context_counter = Counter(context)
+        signature_counter = Counter(signature)
+        # Calculate weighted overlap
+        overlap_score = 0
+        for word, count in context_counter.items():
+            if word in signature_counter:
+                # Score is product of frequencies
+                overlap_score += count * min(signature_counter[word], 5)
+        return overlap_score
+    def bert_similarity(self, sense, context_sentence, target_word):
+        """Calculate semantic similarity using BERT embeddings"""
+        if not hasattr(self, 'bert_available') or not self.bert_available:
+            return 0
+        try:
+            import torch
+            # Create context-gloss pair as in GlossBERT
+            gloss = sense.definition()
+            # Tokenize
+            inputs = self.tokenizer(context_sentence, gloss, return_tensors="pt",
+                                   padding=True, truncation=True, max_length=512)
+            # Get embeddings
+            with torch.no_grad():
+                outputs = self.bert_model(**inputs)
+            # Use CLS token embedding for similarity
+            similarity = torch.cosine_similarity(
+                outputs.last_hidden_state[0, 0],
+                outputs.last_hidden_state[0, inputs.input_ids[0].tolist().index(self.tokenizer.sep_token_id) + 1]
+            ).item()
+            return similarity * 10  # Scale up to be comparable with other scores
+        except Exception as e:
+            print(f"Error in BERT similarity calculation: {e}")
+            return 0
+    def check_collocations(self, sentence, target_word):
+        """Check for common collocations that indicate specific senses"""
+        collocations = {
+            "bat": {
+                "noun.animal": ["flying bat", "bat flying", "bat wings", "vampire bat", "fruit bat", "bat in the dark", "bat at night"],
+                "noun.artifact": ["baseball bat", "cricket bat", "swing the bat", "wooden bat", "hit with bat"]
+            },
+            "bank": {
+                "noun.artifact": ["bank account", "bank manager", "bank loan", "bank robbery", "money in bank"],
+                "noun.object": ["river bank", "bank of the river", "west bank", "bank erosion", "along the bank"]
+            },
+            "bass": {
+                "noun.animal": ["bass fish", "catch bass", "fishing bass", "largemouth bass"],
+                "noun.attribute": ["bass sound", "bass guitar", "bass player", "bass note", "bass drum"]
+            },
+            "spring": {
+                "noun.time": ["spring season", "this spring", "last spring", "spring weather", "spring flowers"],
+                "noun.artifact": ["metal spring", "spring coil", "spring mechanism"],
+                "noun.object": ["water spring", "hot spring", "spring water"]
+            },
+            "crane": {
+                "noun.animal": ["crane bird", "crane flew", "crane nest", "crane species"],
+                "noun.artifact": ["construction crane", "crane operator", "crane lifted"]
+            }
+        }
+        if target_word not in collocations:
+            return None, 0
+        # Check for collocations in sentence
+        sentence_lower = sentence.lower()
+        for domain, phrases in collocations[target_word].items():
+            for phrase in phrases:
+                if phrase.lower() in sentence_lower:
+                    # Find matching sense
+                    for sense in wn.synsets(target_word):
+                        if sense.lexname() == domain:
+                            return sense, 15  # Very high confidence for collocations
+        return None, 0
+    def apply_rules(self, word, context, senses):
+        """Apply hand-coded rules for common ambiguous words"""
+        word = word.lower()
+        context_words = set(context)
+        # Rules for "bat"
+        if word == "bat":
+            # Animal sense rules
+            animal_indicators = {"fly", "flying", "flew", "wing", "wings", "night",
+                                "dark", "cave", "nocturnal", "mammal", "animal", "leather", "leathery"}
+            if any(indicator in context_words for indicator in animal_indicators):
+                # Find animal sense
+                for sense in senses:
+                    if sense.lexname() == "noun.animal":
+                        return 10, sense  # High confidence boost
+            # Sports equipment rules
+            sports_indicators = {"hit", "swing", "ball", "baseball", "cricket",
+                                "player", "game", "sport", "team", "wooden"}
+            if any(indicator in context_words for indicator in sports_indicators):
+                # Find artifact sense
+                for sense in senses:
+                    if sense.lexname() == "noun.artifact":
+                        return 8, sense  # High confidence boost
+        # Rules for "bank"
+        elif word == "bank":
+            # Financial institution rules
+            finance_indicators = {"money", "account", "deposit", "withdraw", "loan",
+                                 "credit", "debit", "financial", "cash", "check"}
+            if any(indicator in context_words for indicator in finance_indicators):
+                for sense in senses:
+                    if "financial" in sense.definition() or "money" in sense.definition():
+                        return 10, sense
+            # River bank rules
+            river_indicators = {"river", "stream", "water", "flow", "shore", "beach"}
+            if any(indicator in context_words for indicator in river_indicators):
+                for sense in senses:
+                    if "river" in sense.definition() or "stream" in sense.definition():
+                        return 10, sense
+        # Rules for "bass"
+        elif word == "bass":
+            # Fish sense rules
+            fish_indicators = {"fish", "fishing", "catch", "caught", "water", "lake", "river"}
+            if any(indicator in context_words for indicator in fish_indicators):
+                for sense in senses:
+                    if sense.lexname() == "noun.animal":
+                        return 10, sense
+            # Sound/music sense rules
+            music_indicators = {"music", "sound", "guitar", "player", "band", "note", "tone", "instrument", "concert", "loud"}
+            if any(indicator in context_words for indicator in music_indicators):
+                for sense in senses:
+                    if sense.lexname() == "noun.attribute" or "music" in sense.definition():
+                        return 10, sense
+        # No rule matched with high confidence
+        return 0, None
+    def safe_compare_synsets(self, synset1, synset2):
+        """Safely compare two synsets, handling None values."""
+        if synset1 is None or synset2 is None:
+            return synset1 is synset2  # True only if both are None
+        # Use the built-in equality check for synsets
+        try:
+            return synset1 == synset2
+        except AttributeError:
+            return False  # If comparison fails, they're not equal
+    def disambiguate(self, sentence, word):
+        """Disambiguate a word in a given sentence context"""
+        word = word.lower()
+        # Get POS tag for the target word
+        word_tokens = word_tokenize(sentence)
+        pos_tags = pos_tag(word_tokens)
+        word_pos = None
+        for token, pos in pos_tags:
+            if token.lower() == word:
+                word_pos = self.get_wordnet_pos(pos)
+                break
+        # Get senses filtered by POS if available
+        if word_pos:
+            senses = [s for s in wn.synsets(word) if s.pos() == word_pos]
+            if not senses:
+                senses = wn.synsets(word)
+        else:
+            senses = wn.synsets(word)
+        if not senses:
+            return None, []
+        # Process context with positional weighting
+        context = self.process_context(sentence, word)
+        # 1. Check for collocations first (highest priority)
+        collocation_sense, collocation_score = self.check_collocations(sentence, word)
+        if collocation_sense and collocation_score > 0:
+            # Return the collocation sense and remaining senses as alternatives
+            top_senses = [s for s in senses if not self.safe_compare_synsets(s, collocation_sense)][:3]
+            return collocation_sense, top_senses
+        # 2. Apply rules for common ambiguous words
+        rule_score, rule_sense = self.apply_rules(word, context, senses)
+        # Score each sense
+        scored_senses = []
+        for sense in senses:
+            # If this sense was selected by rules, add the rule score
+            # FIX: Use safe comparison to prevent AttributeError
+            rule_boost = rule_score if (rule_sense is not None and self.safe_compare_synsets(sense, rule_sense)) else 0
+            # Calculate base score using overlap
+            overlap_score = self.calculate_overlap_score(sense, context)
+            # Calculate BERT similarity if available
+            bert_score = 0
+            if hasattr(self, 'bert_available') and self.bert_available:
+                bert_score = self.bert_similarity(sense, sentence, word)
+            # Apply feedback boost if available
+            feedback_key = f"{word}_{hash(sentence) % 10000}"
+            feedback_score = self.feedback.get(feedback_key, {}).get(sense.name(), 0)
+            # Calculate final score as weighted combination
+            final_score = (
+                overlap_score * 0.4 +
+                bert_score * 0.3 +
+                rule_boost * 0.2 +
+                feedback_score * 0.1
+            )
+            scored_senses.append((final_score, sense))
+        scored_senses.sort(reverse=True, key=lambda x: x[0])
+        if not scored_senses:
+            return None, []
+        best_sense = scored_senses[0][1]
+        top_senses = [s[1] for s in scored_senses[1:4]]
+        return best_sense, top_senses
+    def add_feedback(self, word, context, correct_sense):
+        """Store user feedback to improve future disambiguation"""
+        # Create a key based on word and hashed context
+        context_str = ' '.join(context[:10])  # Use first 10 context words
+        key = f"{word}_{hash(context_str) % 10000}"
+        if key not in self.feedback:
+            self.feedback[key] = {}
+        # Increase score for the correct sense
+        self.feedback[key][correct_sense] = self.feedback[key].get(correct_sense, 0) + 5
+        # Optionally decrease scores for other senses
+        for sense in wn.synsets(word):
+            if sense.name() != correct_sense and sense.name() in self.feedback[key]:
+                self.feedback[key][sense.name()] = max(0, self.feedback[key][sense.name()] - 1)
+        self.save_feedback()
+        # Return the updated sense information
+        for sense in wn.synsets(word):
+            if sense.name() == correct_sense:
+                return {
+                    'definition': sense.definition(),
+                    'examples': sense.examples()
+                }
+        return None
+# Initialize the Lesk processor
+lesk_processor = EnhancedLesk()
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    if request.method == 'POST':
+        text = request.form['text']
+        target_word = request.form.get('target_word', '')
+        return redirect(url_for('results', text=text, word=target_word))
+    return render_template('index.html')
+@app.route('/results')
+def results():
+    text = request.args.get('text', '')
+    target_word = request.args.get('word', '').lower()
+    if not target_word:
+        # Find ambiguous words (with multiple senses)
+        words = word_tokenize(text.lower())
+        ambiguous_words = []
+        for word in words:
+            if word.isalpha() and len(wn.synsets(word)) > 1:
+                ambiguous_words.append(word)
+        # If there are ambiguous words, use the first one
+        if ambiguous_words:
+            target_word = ambiguous_words[0]
+    best_sense = None
+    top_senses = []
+    highlighted_text = text
+    sentence = ""
+    context_words = []
+    if target_word:
+        sentences = sent_tokenize(text)
+        for sent in sentences:
+            if re.search(r'\b' + re.escape(target_word) + r'\b', sent, re.I):
+                sentence = sent
+                context_words = lesk_processor.process_context(sent, target_word)
+                try:
+                    best_sense, top_senses = lesk_processor.disambiguate(sent, target_word)
+                except Exception as e:
+                    print(f"Disambiguation error: {e}")
+                    return render_template('error.html',
+                                         error_message=f"Could not disambiguate the word '{target_word}'. Please try a different word or sentence.",
+                                         error_details=str(e))
+                highlighted_text = re.sub(
+                    r'\b' + re.escape(target_word) + r'\b',
+                    f'<span class="highlight-word">{target_word}</span>',
+                    text,
+                    flags=re.IGNORECASE
+                )
+                break
+    # Store in session for feedback
+    if best_sense:
+        session['last_disambiguation'] = {
+            'word': target_word,
+            'context': context_words,
+            'sentence': sentence
+        }
+    return render_template('results.html',
+                         text=text,
+                         highlighted_text=highlighted_text,
+                         target_word=target_word,
+                         best_sense=best_sense,
+                         top_senses=top_senses,
+                         sentence=sentence,
+                         context_words=', '.join([w for w in set(context_words)][:10]))  # Show unique context words
+@app.route('/feedback', methods=['POST'])
+def feedback():
+    data = request.get_json()
+    word = data.get('word')
+    context = data.get('context', [])
+    correct_sense = data.get('correct_sense')
+    if word and correct_sense:
+        updated_sense = lesk_processor.add_feedback(word, context, correct_sense)
+        return jsonify(updated_sense)
+    return jsonify({'error': 'Invalid feedback data'}), 400
+@app.route('/lesk-explained')
+def lesk_explained():
+    return render_template('lesk_explained.html')
+# Add error template handler
+@app.route('/error')
+def error():
+    error_message = request.args.get('message', 'An unknown error occurred')
+    error_details = request.args.get('details', '')
+    return render_template('error.html', error_message=error_message, error_details=error_details)
+if __name__ == '__main__':
+    app.run(debug=True)

code.txt ADDED Viewed

	@@ -0,0 +1,495 @@

+from flask import Flask, render_template, request, redirect, url_for, jsonify, session
+import nltk
+from nltk.corpus import wordnet as wn
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.tag import pos_tag
+from nltk.stem import WordNetLemmatizer
+from collections import Counter
+import re
+import os
+import json
+import random
+# Download required NLTK resources
+nltk.download('wordnet')
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('stopwords')
+app = Flask(__name__)
+app.secret_key = 'wsd_secret_key_2023'
+# Path for storing feedback data
+FEEDBACK_FILE = 'feedback_data.json'
+class EnhancedLesk:
+    def __init__(self):
+        self.feedback = self.load_feedback()
+        self.lemmatizer = WordNetLemmatizer()
+        self.stopwords = set(nltk.corpus.stopwords.words('english'))
+        # Try to load BERT models if available
+        try:
+            from transformers import AutoTokenizer, AutoModel
+            import torch
+            # Load pre-trained model and tokenizer
+            print("Loading BERT models...")
+            self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+            self.bert_model = AutoModel.from_pretrained('bert-base-uncased')
+            self.bert_available = True
+            print("BERT models loaded successfully")
+        except Exception as e:
+            print(f"BERT models not available: {e}")
+            print("Continuing without BERT embeddings")
+            self.bert_available = False
+    def load_feedback(self):
+        if os.path.exists(FEEDBACK_FILE):
+            with open(FEEDBACK_FILE) as f:
+                return json.load(f)
+        return {}
+    def save_feedback(self):
+        with open(FEEDBACK_FILE, 'w') as f:
+            json.dump(self.feedback, f)
+    def get_wordnet_pos(self, treebank_tag):
+        """Convert POS tag to WordNet POS format"""
+        if treebank_tag.startswith('J'):
+            return wn.ADJ
+        elif treebank_tag.startswith('V'):
+            return wn.VERB
+        elif treebank_tag.startswith('N'):
+            return wn.NOUN
+        elif treebank_tag.startswith('R'):
+            return wn.ADV
+        else:
+            return None
+    def process_context(self, sentence, target_word):
+        """Process context words with positional weighting"""
+        words = word_tokenize(sentence.lower())
+        # Find target word position
+        target_pos = -1
+        for i, word in enumerate(words):
+            if word.lower() == target_word.lower():
+                target_pos = i
+                break
+        # Process context words with proximity weighting
+        context_words = []
+        for i, word in enumerate(words):
+            if word.isalpha() and word not in self.stopwords:
+                lemma = self.lemmatizer.lemmatize(word)
+                # Weight by proximity to target word (closer = more important)
+                if target_pos >= 0:
+                    distance = abs(i - target_pos)
+                    # Add word multiple times based on proximity (max 5 times for adjacent words)
+                    weight = max(1, 6 - distance) if distance <= 5 else 1
+                    context_words.extend([lemma] * weight)
+                else:
+                    context_words.append(lemma)
+        return context_words
+    def calculate_overlap_score(self, sense, context):
+        """Calculate overlap between sense signature and context with improved weighting"""
+        # Create rich signature from sense
+        signature = []
+        # Add definition words (higher weight)
+        def_words = [w.lower() for w in word_tokenize(sense.definition())
+                    if w.isalpha() and w not in self.stopwords]
+        signature.extend(def_words * 2)  # Double weight for definition
+        # Add example words
+        for example in sense.examples():
+            ex_words = [w.lower() for w in word_tokenize(example)
+                       if w.isalpha() and w not in self.stopwords]
+            signature.extend(ex_words)
+        # Add hypernyms, hyponyms, meronyms and holonyms
+        for hypernym in sense.hypernyms():
+            hyper_words = [w.lower() for w in word_tokenize(hypernym.definition())
+                          if w.isalpha() and w not in self.stopwords]
+            signature.extend(hyper_words)
+        for hyponym in sense.hyponyms():
+            hypo_words = [w.lower() for w in word_tokenize(hyponym.definition())
+                         if w.isalpha() and w not in self.stopwords]
+            signature.extend(hypo_words)
+        # Add meronyms and holonyms
+        for meronym in sense.part_meronyms() + sense.substance_meronyms():
+            meronym_words = [w.lower() for w in word_tokenize(meronym.definition())
+                            if w.isalpha() and w not in self.stopwords]
+            signature.extend(meronym_words)
+        for holonym in sense.part_holonyms() + sense.substance_holonyms():
+            holonym_words = [w.lower() for w in word_tokenize(holonym.definition())
+                            if w.isalpha() and w not in self.stopwords]
+            signature.extend(holonym_words)
+        # Calculate overlap using Counter for better frequency matching
+        context_counter = Counter(context)
+        signature_counter = Counter(signature)
+        # Calculate weighted overlap
+        overlap_score = 0
+        for word, count in context_counter.items():
+            if word in signature_counter:
+                # Score is product of frequencies
+                overlap_score += count * min(signature_counter[word], 5)
+        return overlap_score
+    def bert_similarity(self, sense, context_sentence, target_word):
+        """Calculate semantic similarity using BERT embeddings"""
+        if not hasattr(self, 'bert_available') or not self.bert_available:
+            return 0
+        try:
+            import torch
+            # Create context-gloss pair as in GlossBERT
+            gloss = sense.definition()
+            # Tokenize
+            inputs = self.tokenizer(context_sentence, gloss, return_tensors="pt",
+                                   padding=True, truncation=True, max_length=512)
+            # Get embeddings
+            with torch.no_grad():
+                outputs = self.bert_model(**inputs)
+            # Use CLS token embedding for similarity
+            similarity = torch.cosine_similarity(
+                outputs.last_hidden_state[0, 0],
+                outputs.last_hidden_state[0, inputs.input_ids[0].tolist().index(self.tokenizer.sep_token_id) + 1]
+            ).item()
+            return similarity * 10  # Scale up to be comparable with other scores
+        except Exception as e:
+            print(f"Error in BERT similarity calculation: {e}")
+            return 0
+    def check_collocations(self, sentence, target_word):
+        """Check for common collocations that indicate specific senses"""
+        collocations = {
+            "bat": {
+                "noun.animal": ["flying bat", "bat flying", "bat wings", "vampire bat", "fruit bat", "bat in the dark", "bat at night"],
+                "noun.artifact": ["baseball bat", "cricket bat", "swing the bat", "wooden bat", "hit with bat"]
+            },
+            "bank": {
+                "noun.artifact": ["bank account", "bank manager", "bank loan", "bank robbery", "money in bank"],
+                "noun.object": ["river bank", "bank of the river", "west bank", "bank erosion", "along the bank"]
+            },
+            "bass": {
+                "noun.animal": ["bass fish", "catch bass", "fishing bass", "largemouth bass"],
+                "noun.attribute": ["bass sound", "bass guitar", "bass player", "bass note", "bass drum"]
+            },
+            "spring": {
+                "noun.time": ["spring season", "this spring", "last spring", "spring weather", "spring flowers"],
+                "noun.artifact": ["metal spring", "spring coil", "spring mechanism"],
+                "noun.object": ["water spring", "hot spring", "spring water"]
+            },
+            "crane": {
+                "noun.animal": ["crane bird", "crane flew", "crane nest", "crane species"],
+                "noun.artifact": ["construction crane", "crane operator", "crane lifted"]
+            }
+        }
+        if target_word not in collocations:
+            return None, 0
+        # Check for collocations in sentence
+        sentence_lower = sentence.lower()
+        for domain, phrases in collocations[target_word].items():
+            for phrase in phrases:
+                if phrase.lower() in sentence_lower:
+                    # Find matching sense
+                    for sense in wn.synsets(target_word):
+                        if sense.lexname() == domain:
+                            return sense, 15  # Very high confidence for collocations
+        return None, 0
+    def apply_rules(self, word, context, senses):
+        """Apply hand-coded rules for common ambiguous words"""
+        word = word.lower()
+        context_words = set(context)
+        # Rules for "bat"
+        if word == "bat":
+            # Animal sense rules
+            animal_indicators = {"fly", "flying", "flew", "wing", "wings", "night",
+                                "dark", "cave", "nocturnal", "mammal", "animal", "leather", "leathery"}
+            if any(indicator in context_words for indicator in animal_indicators):
+                # Find animal sense
+                for sense in senses:
+                    if sense.lexname() == "noun.animal":
+                        return 10, sense  # High confidence boost
+            # Sports equipment rules
+            sports_indicators = {"hit", "swing", "ball", "baseball", "cricket",
+                                "player", "game", "sport", "team", "wooden"}
+            if any(indicator in context_words for indicator in sports_indicators):
+                # Find artifact sense
+                for sense in senses:
+                    if sense.lexname() == "noun.artifact":
+                        return 8, sense  # High confidence boost
+        # Rules for "bank"
+        elif word == "bank":
+            # Financial institution rules
+            finance_indicators = {"money", "account", "deposit", "withdraw", "loan",
+                                 "credit", "debit", "financial", "cash", "check"}
+            if any(indicator in context_words for indicator in finance_indicators):
+                for sense in senses:
+                    if "financial" in sense.definition() or "money" in sense.definition():
+                        return 10, sense
+            # River bank rules
+            river_indicators = {"river", "stream", "water", "flow", "shore", "beach"}
+            if any(indicator in context_words for indicator in river_indicators):
+                for sense in senses:
+                    if "river" in sense.definition() or "stream" in sense.definition():
+                        return 10, sense
+        # Rules for "bass"
+        elif word == "bass":
+            # Fish sense rules
+            fish_indicators = {"fish", "fishing", "catch", "caught", "water", "lake", "river"}
+            if any(indicator in context_words for indicator in fish_indicators):
+                for sense in senses:
+                    if sense.lexname() == "noun.animal":
+                        return 10, sense
+            # Sound/music sense rules
+            music_indicators = {"music", "sound", "guitar", "player", "band", "note", "tone", "instrument", "concert", "loud"}
+            if any(indicator in context_words for indicator in music_indicators):
+                for sense in senses:
+                    if sense.lexname() == "noun.attribute" or "music" in sense.definition():
+                        return 10, sense
+        # No rule matched with high confidence
+        return 0, None
+    def safe_compare_synsets(self, synset1, synset2):
+        """Safely compare two synsets, handling None values."""
+        if synset1 is None or synset2 is None:
+            return synset1 is synset2  # True only if both are None
+        # Use the built-in equality check for synsets
+        try:
+            return synset1 == synset2
+        except AttributeError:
+            return False  # If comparison fails, they're not equal
+    def disambiguate(self, sentence, word):
+        """Disambiguate a word in a given sentence context"""
+        word = word.lower()
+        # Get POS tag for the target word
+        word_tokens = word_tokenize(sentence)
+        pos_tags = pos_tag(word_tokens)
+        word_pos = None
+        for token, pos in pos_tags:
+            if token.lower() == word:
+                word_pos = self.get_wordnet_pos(pos)
+                break
+        # Get senses filtered by POS if available
+        if word_pos:
+            senses = [s for s in wn.synsets(word) if s.pos() == word_pos]
+            if not senses:
+                senses = wn.synsets(word)
+        else:
+            senses = wn.synsets(word)
+        if not senses:
+            return None, []
+        # Process context with positional weighting
+        context = self.process_context(sentence, word)
+        # 1. Check for collocations first (highest priority)
+        collocation_sense, collocation_score = self.check_collocations(sentence, word)
+        if collocation_sense and collocation_score > 0:
+            # Return the collocation sense and remaining senses as alternatives
+            top_senses = [s for s in senses if not self.safe_compare_synsets(s, collocation_sense)][:3]
+            return collocation_sense, top_senses
+        # 2. Apply rules for common ambiguous words
+        rule_score, rule_sense = self.apply_rules(word, context, senses)
+        # Score each sense
+        scored_senses = []
+        for sense in senses:
+            # If this sense was selected by rules, add the rule score
+            # FIX: Use safe comparison to prevent AttributeError
+            rule_boost = rule_score if (rule_sense is not None and self.safe_compare_synsets(sense, rule_sense)) else 0
+            # Calculate base score using overlap
+            overlap_score = self.calculate_overlap_score(sense, context)
+            # Calculate BERT similarity if available
+            bert_score = 0
+            if hasattr(self, 'bert_available') and self.bert_available:
+                bert_score = self.bert_similarity(sense, sentence, word)
+            # Apply feedback boost if available
+            feedback_key = f"{word}_{hash(sentence) % 10000}"
+            feedback_score = self.feedback.get(feedback_key, {}).get(sense.name(), 0)
+            # Calculate final score as weighted combination
+            final_score = (
+                overlap_score * 0.4 +
+                bert_score * 0.3 +
+                rule_boost * 0.2 +
+                feedback_score * 0.1
+            )
+            scored_senses.append((final_score, sense))
+        scored_senses.sort(reverse=True, key=lambda x: x[0])
+        if not scored_senses:
+            return None, []
+        best_sense = scored_senses[0][1]
+        top_senses = [s[1] for s in scored_senses[1:4]]
+        return best_sense, top_senses
+    def add_feedback(self, word, context, correct_sense):
+        """Store user feedback to improve future disambiguation"""
+        # Create a key based on word and hashed context
+        context_str = ' '.join(context[:10])  # Use first 10 context words
+        key = f"{word}_{hash(context_str) % 10000}"
+        if key not in self.feedback:
+            self.feedback[key] = {}
+        # Increase score for the correct sense
+        self.feedback[key][correct_sense] = self.feedback[key].get(correct_sense, 0) + 5
+        # Optionally decrease scores for other senses
+        for sense in wn.synsets(word):
+            if sense.name() != correct_sense and sense.name() in self.feedback[key]:
+                self.feedback[key][sense.name()] = max(0, self.feedback[key][sense.name()] - 1)
+        self.save_feedback()
+        # Return the updated sense information
+        for sense in wn.synsets(word):
+            if sense.name() == correct_sense:
+                return {
+                    'definition': sense.definition(),
+                    'examples': sense.examples()
+                }
+        return None
+# Initialize the Lesk processor
+lesk_processor = EnhancedLesk()
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    if request.method == 'POST':
+        text = request.form['text']
+        target_word = request.form.get('target_word', '')
+        return redirect(url_for('results', text=text, word=target_word))
+    return render_template('index.html')
+@app.route('/results')
+def results():
+    text = request.args.get('text', '')
+    target_word = request.args.get('word', '').lower()
+    if not target_word:
+        # Find ambiguous words (with multiple senses)
+        words = word_tokenize(text.lower())
+        ambiguous_words = []
+        for word in words:
+            if word.isalpha() and len(wn.synsets(word)) > 1:
+                ambiguous_words.append(word)
+        # If there are ambiguous words, use the first one
+        if ambiguous_words:
+            target_word = ambiguous_words[0]
+    best_sense = None
+    top_senses = []
+    highlighted_text = text
+    sentence = ""
+    context_words = []
+    if target_word:
+        sentences = sent_tokenize(text)
+        for sent in sentences:
+            if re.search(r'\b' + re.escape(target_word) + r'\b', sent, re.I):
+                sentence = sent
+                context_words = lesk_processor.process_context(sent, target_word)
+                try:
+                    best_sense, top_senses = lesk_processor.disambiguate(sent, target_word)
+                except Exception as e:
+                    print(f"Disambiguation error: {e}")
+                    return render_template('error.html',
+                                         error_message=f"Could not disambiguate the word '{target_word}'. Please try a different word or sentence.",
+                                         error_details=str(e))
+                highlighted_text = re.sub(
+                    r'\b' + re.escape(target_word) + r'\b',
+                    f'<span class="highlight-word">{target_word}</span>',
+                    text,
+                    flags=re.IGNORECASE
+                )
+                break
+    # Store in session for feedback
+    if best_sense:
+        session['last_disambiguation'] = {
+            'word': target_word,
+            'context': context_words,
+            'sentence': sentence
+        }
+    return render_template('results.html',
+                         text=text,
+                         highlighted_text=highlighted_text,
+                         target_word=target_word,
+                         best_sense=best_sense,
+                         top_senses=top_senses,
+                         sentence=sentence,
+                         context_words=', '.join([w for w in set(context_words)][:10]))  # Show unique context words
+@app.route('/feedback', methods=['POST'])
+def feedback():
+    data = request.get_json()
+    word = data.get('word')
+    context = data.get('context', [])
+    correct_sense = data.get('correct_sense')
+    if word and correct_sense:
+        updated_sense = lesk_processor.add_feedback(word, context, correct_sense)
+        return jsonify(updated_sense)
+    return jsonify({'error': 'Invalid feedback data'}), 400
+@app.route('/lesk-explained')
+def lesk_explained():
+    return render_template('lesk_explained.html')
+# Add error template handler
+@app.route('/error')
+def error():
+    error_message = request.args.get('message', 'An unknown error occurred')
+    error_details = request.args.get('details', '')
+    return render_template('error.html', error_message=error_message, error_details=error_details)
+if __name__ == '__main__':
+    app.run(debug=True)

feedback_data.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"tripped_i_go_trip_and_tripped_due_to_imbalance": {"stumble.v.02": 5}, "bat_8076": {"bat.n.01": 5}, "saw_8076": {"see.v.19": 5}, "spring_1682": {"spring.n.01": 5}, "trunk_9387": {"proboscis.n.02": 5}, "bank_7813": {"bank.n.01": 5}}

flow.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.sankey import Sankey
+# Create a flowchart using matplotlib with boxes and arrows
+def draw_flowchart():
+    fig, ax = plt.subplots(figsize=(12, 10))
+    ax.axis('off')
+    # Define boxes with text
+    boxes = {
+        'start': (0.4, 0.9, 0.2, 0.05, 'Start: Input Sentence and Target Word'),
+        'pos_tag': (0.4, 0.82, 0.2, 0.05, 'POS Tagging of Target Word'),
+        'get_senses': (0.4, 0.74, 0.2, 0.05, 'Get WordNet Senses (Filtered by POS)'),
+        'process_context': (0.4, 0.66, 0.2, 0.05, 'Process Context with Positional Weighting'),
+        'check_collocations': (0.4, 0.58, 0.2, 0.05, 'Check for Collocations'),
+        'apply_rules': (0.4, 0.5, 0.2, 0.05, 'Apply Rule-Based Boosting'),
+        'calculate_overlap': (0.4, 0.42, 0.2, 0.05, 'Calculate Overlap Score (Lesk)'),
+        'bert_similarity': (0.4, 0.34, 0.2, 0.05, 'Calculate BERT Semantic Similarity'),
+        'feedback_boost': (0.4, 0.26, 0.2, 0.05, 'Apply Feedback Boost'),
+        'combine_scores': (0.4, 0.18, 0.2, 0.05, 'Combine Scores with Weights'),
+        'select_best': (0.4, 0.1, 0.2, 0.05, 'Select Best Sense and Alternatives'),
+        'end': (0.4, 0.02, 0.2, 0.05, 'End: Return Disambiguation Result')
+    }
+    # Draw boxes
+    for key, (x, y, w, h, text) in boxes.items():
+        rect = plt.Rectangle((x, y), w, h, fill=True, edgecolor='black', facecolor='#cce5ff')
+        ax.add_patch(rect)
+        ax.text(x + w/2, y + h/2, text, ha='center', va='center', fontsize=10, wrap=True)
+    # Draw arrows between boxes
+    def draw_arrow(start_key, end_key):
+        x_start, y_start, w_start, h_start, _ = boxes[start_key]
+        x_end, y_end, w_end, h_end, _ = boxes[end_key]
+        ax.annotate('', xy=(x_end + w_end/2, y_end + h_end), xytext=(x_start + w_start/2, y_start),
+                    arrowprops=dict(arrowstyle='->', lw=1.5))
+    flow_sequence = [
+        'start', 'pos_tag', 'get_senses', 'process_context', 'check_collocations',
+        'apply_rules', 'calculate_overlap', 'bert_similarity', 'feedback_boost',
+        'combine_scores', 'select_best', 'end'
+    ]
+    for i in range(len(flow_sequence) - 1):
+        draw_arrow(flow_sequence[i], flow_sequence[i+1])
+    plt.title('Flowchart of Enhanced Lesk-based Word Sense Disambiguation Algorithm', fontsize=14)
+    plt.show()
+# Draw the flowchart
+draw_flowchart()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+Flask==2.3.2
+nltk==3.8.1
+Werkzeug==2.3.6
+transformers==4.28.1
+torch==2.2.0

tempCodeRunnerFile.py ADDED Viewed

	@@ -0,0 +1,495 @@

+from flask import Flask, render_template, request, redirect, url_for, jsonify, session
+import nltk
+from nltk.corpus import wordnet as wn
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.tag import pos_tag
+from nltk.stem import WordNetLemmatizer
+from collections import Counter
+import re
+import os
+import json
+import random
+# Download required NLTK resources
+nltk.download('wordnet')
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('stopwords')
+app = Flask(__name__)
+app.secret_key = 'wsd_secret_key_2023'
+# Path for storing feedback data
+FEEDBACK_FILE = 'feedback_data.json'
+class EnhancedLesk:
+    def __init__(self):
+        self.feedback = self.load_feedback()
+        self.lemmatizer = WordNetLemmatizer()
+        self.stopwords = set(nltk.corpus.stopwords.words('english'))
+        # Try to load BERT models if available
+        try:
+            from transformers import AutoTokenizer, AutoModel
+            import torch
+            # Load pre-trained model and tokenizer
+            print("Loading BERT models...")
+            self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+            self.bert_model = AutoModel.from_pretrained('bert-base-uncased')
+            self.bert_available = True
+            print("BERT models loaded successfully")
+        except Exception as e:
+            print(f"BERT models not available: {e}")
+            print("Continuing without BERT embeddings")
+            self.bert_available = False
+    def load_feedback(self):
+        if os.path.exists(FEEDBACK_FILE):
+            with open(FEEDBACK_FILE) as f:
+                return json.load(f)
+        return {}
+    def save_feedback(self):
+        with open(FEEDBACK_FILE, 'w') as f:
+            json.dump(self.feedback, f)
+    def get_wordnet_pos(self, treebank_tag):
+        """Convert POS tag to WordNet POS format"""
+        if treebank_tag.startswith('J'):
+            return wn.ADJ
+        elif treebank_tag.startswith('V'):
+            return wn.VERB
+        elif treebank_tag.startswith('N'):
+            return wn.NOUN
+        elif treebank_tag.startswith('R'):
+            return wn.ADV
+        else:
+            return None
+    def process_context(self, sentence, target_word):
+        """Process context words with positional weighting"""
+        words = word_tokenize(sentence.lower())
+        # Find target word position
+        target_pos = -1
+        for i, word in enumerate(words):
+            if word.lower() == target_word.lower():
+                target_pos = i
+                break
+        # Process context words with proximity weighting
+        context_words = []
+        for i, word in enumerate(words):
+            if word.isalpha() and word not in self.stopwords:
+                lemma = self.lemmatizer.lemmatize(word)
+                # Weight by proximity to target word (closer = more important)
+                if target_pos >= 0:
+                    distance = abs(i - target_pos)
+                    # Add word multiple times based on proximity (max 5 times for adjacent words)
+                    weight = max(1, 6 - distance) if distance <= 5 else 1
+                    context_words.extend([lemma] * weight)
+                else:
+                    context_words.append(lemma)
+        return context_words
+    def calculate_overlap_score(self, sense, context):
+        """Calculate overlap between sense signature and context with improved weighting"""
+        # Create rich signature from sense
+        signature = []
+        # Add definition words (higher weight)
+        def_words = [w.lower() for w in word_tokenize(sense.definition())
+                    if w.isalpha() and w not in self.stopwords]
+        signature.extend(def_words * 2)  # Double weight for definition
+        # Add example words
+        for example in sense.examples():
+            ex_words = [w.lower() for w in word_tokenize(example)
+                       if w.isalpha() and w not in self.stopwords]
+            signature.extend(ex_words)
+        # Add hypernyms, hyponyms, meronyms and holonyms
+        for hypernym in sense.hypernyms():
+            hyper_words = [w.lower() for w in word_tokenize(hypernym.definition())
+                          if w.isalpha() and w not in self.stopwords]
+            signature.extend(hyper_words)
+        for hyponym in sense.hyponyms():
+            hypo_words = [w.lower() for w in word_tokenize(hyponym.definition())
+                         if w.isalpha() and w not in self.stopwords]
+            signature.extend(hypo_words)
+        # Add meronyms and holonyms
+        for meronym in sense.part_meronyms() + sense.substance_meronyms():
+            meronym_words = [w.lower() for w in word_tokenize(meronym.definition())
+                            if w.isalpha() and w not in self.stopwords]
+            signature.extend(meronym_words)
+        for holonym in sense.part_holonyms() + sense.substance_holonyms():
+            holonym_words = [w.lower() for w in word_tokenize(holonym.definition())
+                            if w.isalpha() and w not in self.stopwords]
+            signature.extend(holonym_words)
+        # Calculate overlap using Counter for better frequency matching
+        context_counter = Counter(context)
+        signature_counter = Counter(signature)
+        # Calculate weighted overlap
+        overlap_score = 0
+        for word, count in context_counter.items():
+            if word in signature_counter:
+                # Score is product of frequencies
+                overlap_score += count * min(signature_counter[word], 5)
+        return overlap_score
+    def bert_similarity(self, sense, context_sentence, target_word):
+        """Calculate semantic similarity using BERT embeddings"""
+        if not hasattr(self, 'bert_available') or not self.bert_available:
+            return 0
+        try:
+            import torch
+            # Create context-gloss pair as in GlossBERT
+            gloss = sense.definition()
+            # Tokenize
+            inputs = self.tokenizer(context_sentence, gloss, return_tensors="pt",
+                                   padding=True, truncation=True, max_length=512)
+            # Get embeddings
+            with torch.no_grad():
+                outputs = self.bert_model(**inputs)
+            # Use CLS token embedding for similarity
+            similarity = torch.cosine_similarity(
+                outputs.last_hidden_state[0, 0],
+                outputs.last_hidden_state[0, inputs.input_ids[0].tolist().index(self.tokenizer.sep_token_id) + 1]
+            ).item()
+            return similarity * 10  # Scale up to be comparable with other scores
+        except Exception as e:
+            print(f"Error in BERT similarity calculation: {e}")
+            return 0
+    def check_collocations(self, sentence, target_word):
+        """Check for common collocations that indicate specific senses"""
+        collocations = {
+            "bat": {
+                "noun.animal": ["flying bat", "bat flying", "bat wings", "vampire bat", "fruit bat", "bat in the dark", "bat at night"],
+                "noun.artifact": ["baseball bat", "cricket bat", "swing the bat", "wooden bat", "hit with bat"]
+            },
+            "bank": {
+                "noun.artifact": ["bank account", "bank manager", "bank loan", "bank robbery", "money in bank"],
+                "noun.object": ["river bank", "bank of the river", "west bank", "bank erosion", "along the bank"]
+            },
+            "bass": {
+                "noun.animal": ["bass fish", "catch bass", "fishing bass", "largemouth bass"],
+                "noun.attribute": ["bass sound", "bass guitar", "bass player", "bass note", "bass drum"]
+            },
+            "spring": {
+                "noun.time": ["spring season", "this spring", "last spring", "spring weather", "spring flowers"],
+                "noun.artifact": ["metal spring", "spring coil", "spring mechanism"],
+                "noun.object": ["water spring", "hot spring", "spring water"]
+            },
+            "crane": {
+                "noun.animal": ["crane bird", "crane flew", "crane nest", "crane species"],
+                "noun.artifact": ["construction crane", "crane operator", "crane lifted"]
+            }
+        }
+        if target_word not in collocations:
+            return None, 0
+        # Check for collocations in sentence
+        sentence_lower = sentence.lower()
+        for domain, phrases in collocations[target_word].items():
+            for phrase in phrases:
+                if phrase.lower() in sentence_lower:
+                    # Find matching sense
+                    for sense in wn.synsets(target_word):
+                        if sense.lexname() == domain:
+                            return sense, 15  # Very high confidence for collocations
+        return None, 0
+    def apply_rules(self, word, context, senses):
+        """Apply hand-coded rules for common ambiguous words"""
+        word = word.lower()
+        context_words = set(context)
+        # Rules for "bat"
+        if word == "bat":
+            # Animal sense rules
+            animal_indicators = {"fly", "flying", "flew", "wing", "wings", "night",
+                                "dark", "cave", "nocturnal", "mammal", "animal", "leather", "leathery"}
+            if any(indicator in context_words for indicator in animal_indicators):
+                # Find animal sense
+                for sense in senses:
+                    if sense.lexname() == "noun.animal":
+                        return 10, sense  # High confidence boost
+            # Sports equipment rules
+            sports_indicators = {"hit", "swing", "ball", "baseball", "cricket",
+                                "player", "game", "sport", "team", "wooden"}
+            if any(indicator in context_words for indicator in sports_indicators):
+                # Find artifact sense
+                for sense in senses:
+                    if sense.lexname() == "noun.artifact":
+                        return 8, sense  # High confidence boost
+        # Rules for "bank"
+        elif word == "bank":
+            # Financial institution rules
+            finance_indicators = {"money", "account", "deposit", "withdraw", "loan",
+                                 "credit", "debit", "financial", "cash", "check"}
+            if any(indicator in context_words for indicator in finance_indicators):
+                for sense in senses:
+                    if "financial" in sense.definition() or "money" in sense.definition():
+                        return 10, sense
+            # River bank rules
+            river_indicators = {"river", "stream", "water", "flow", "shore", "beach"}
+            if any(indicator in context_words for indicator in river_indicators):
+                for sense in senses:
+                    if "river" in sense.definition() or "stream" in sense.definition():
+                        return 10, sense
+        # Rules for "bass"
+        elif word == "bass":
+            # Fish sense rules
+            fish_indicators = {"fish", "fishing", "catch", "caught", "water", "lake", "river"}
+            if any(indicator in context_words for indicator in fish_indicators):
+                for sense in senses:
+                    if sense.lexname() == "noun.animal":
+                        return 10, sense
+            # Sound/music sense rules
+            music_indicators = {"music", "sound", "guitar", "player", "band", "note", "tone", "instrument", "concert", "loud"}
+            if any(indicator in context_words for indicator in music_indicators):
+                for sense in senses:
+                    if sense.lexname() == "noun.attribute" or "music" in sense.definition():
+                        return 10, sense
+        # No rule matched with high confidence
+        return 0, None
+    def safe_compare_synsets(self, synset1, synset2):
+        """Safely compare two synsets, handling None values."""
+        if synset1 is None or synset2 is None:
+            return synset1 is synset2  # True only if both are None
+        # Use the built-in equality check for synsets
+        try:
+            return synset1 == synset2
+        except AttributeError:
+            return False  # If comparison fails, they're not equal
+    def disambiguate(self, sentence, word):
+        """Disambiguate a word in a given sentence context"""
+        word = word.lower()
+        # Get POS tag for the target word
+        word_tokens = word_tokenize(sentence)
+        pos_tags = pos_tag(word_tokens)
+        word_pos = None
+        for token, pos in pos_tags:
+            if token.lower() == word:
+                word_pos = self.get_wordnet_pos(pos)
+                break
+        # Get senses filtered by POS if available
+        if word_pos:
+            senses = [s for s in wn.synsets(word) if s.pos() == word_pos]
+            if not senses:
+                senses = wn.synsets(word)
+        else:
+            senses = wn.synsets(word)
+        if not senses:
+            return None, []
+        # Process context with positional weighting
+        context = self.process_context(sentence, word)
+        # 1. Check for collocations first (highest priority)
+        collocation_sense, collocation_score = self.check_collocations(sentence, word)
+        if collocation_sense and collocation_score > 0:
+            # Return the collocation sense and remaining senses as alternatives
+            top_senses = [s for s in senses if not self.safe_compare_synsets(s, collocation_sense)][:3]
+            return collocation_sense, top_senses
+        # 2. Apply rules for common ambiguous words
+        rule_score, rule_sense = self.apply_rules(word, context, senses)
+        # Score each sense
+        scored_senses = []
+        for sense in senses:
+            # If this sense was selected by rules, add the rule score
+            # FIX: Use safe comparison to prevent AttributeError
+            rule_boost = rule_score if (rule_sense is not None and self.safe_compare_synsets(sense, rule_sense)) else 0
+            # Calculate base score using overlap
+            overlap_score = self.calculate_overlap_score(sense, context)
+            # Calculate BERT similarity if available
+            bert_score = 0
+            if hasattr(self, 'bert_available') and self.bert_available:
+                bert_score = self.bert_similarity(sense, sentence, word)
+            # Apply feedback boost if available
+            feedback_key = f"{word}_{hash(sentence) % 10000}"
+            feedback_score = self.feedback.get(feedback_key, {}).get(sense.name(), 0)
+            # Calculate final score as weighted combination
+            final_score = (
+                overlap_score * 0.4 +
+                bert_score * 0.3 +
+                rule_boost * 0.2 +
+                feedback_score * 0.1
+            )
+            scored_senses.append((final_score, sense))
+        scored_senses.sort(reverse=True, key=lambda x: x[0])
+        if not scored_senses:
+            return None, []
+        best_sense = scored_senses[0][1]
+        top_senses = [s[1] for s in scored_senses[1:4]]
+        return best_sense, top_senses
+    def add_feedback(self, word, context, correct_sense):
+        """Store user feedback to improve future disambiguation"""
+        # Create a key based on word and hashed context
+        context_str = ' '.join(context[:10])  # Use first 10 context words
+        key = f"{word}_{hash(context_str) % 10000}"
+        if key not in self.feedback:
+            self.feedback[key] = {}
+        # Increase score for the correct sense
+        self.feedback[key][correct_sense] = self.feedback[key].get(correct_sense, 0) + 5
+        # Optionally decrease scores for other senses
+        for sense in wn.synsets(word):
+            if sense.name() != correct_sense and sense.name() in self.feedback[key]:
+                self.feedback[key][sense.name()] = max(0, self.feedback[key][sense.name()] - 1)
+        self.save_feedback()
+        # Return the updated sense information
+        for sense in wn.synsets(word):
+            if sense.name() == correct_sense:
+                return {
+                    'definition': sense.definition(),
+                    'examples': sense.examples()
+                }
+        return None
+# Initialize the Lesk processor
+lesk_processor = EnhancedLesk()
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    if request.method == 'POST':
+        text = request.form['text']
+        target_word = request.form.get('target_word', '')
+        return redirect(url_for('results', text=text, word=target_word))
+    return render_template('index.html')
+@app.route('/results')
+def results():
+    text = request.args.get('text', '')
+    target_word = request.args.get('word', '').lower()
+    if not target_word:
+        # Find ambiguous words (with multiple senses)
+        words = word_tokenize(text.lower())
+        ambiguous_words = []
+        for word in words:
+            if word.isalpha() and len(wn.synsets(word)) > 1:
+                ambiguous_words.append(word)
+        # If there are ambiguous words, use the first one
+        if ambiguous_words:
+            target_word = ambiguous_words[0]
+    best_sense = None
+    top_senses = []
+    highlighted_text = text
+    sentence = ""
+    context_words = []
+    if target_word:
+        sentences = sent_tokenize(text)
+        for sent in sentences:
+            if re.search(r'\b' + re.escape(target_word) + r'\b', sent, re.I):
+                sentence = sent
+                context_words = lesk_processor.process_context(sent, target_word)
+                try:
+                    best_sense, top_senses = lesk_processor.disambiguate(sent, target_word)
+                except Exception as e:
+                    print(f"Disambiguation error: {e}")
+                    return render_template('error.html',
+                                         error_message=f"Could not disambiguate the word '{target_word}'. Please try a different word or sentence.",
+                                         error_details=str(e))
+                highlighted_text = re.sub(
+                    r'\b' + re.escape(target_word) + r'\b',
+                    f'<span class="highlight-word">{target_word}</span>',
+                    text,
+                    flags=re.IGNORECASE
+                )
+                break
+    # Store in session for feedback
+    if best_sense:
+        session['last_disambiguation'] = {
+            'word': target_word,
+            'context': context_words,
+            'sentence': sentence
+        }
+    return render_template('results.html',
+                         text=text,
+                         highlighted_text=highlighted_text,
+                         target_word=target_word,
+                         best_sense=best_sense,
+                         top_senses=top_senses,
+                         sentence=sentence,
+                         context_words=', '.join([w for w in set(context_words)][:10]))  # Show unique context words
+@app.route('/feedback', methods=['POST'])
+def feedback():
+    data = request.get_json()
+    word = data.get('word')
+    context = data.get('context', [])
+    correct_sense = data.get('correct_sense')
+    if word and correct_sense:
+        updated_sense = lesk_processor.add_feedback(word, context, correct_sense)
+        return jsonify(updated_sense)
+    return jsonify({'error': 'Invalid feedback data'}), 400
+@app.route('/lesk-explained')
+def lesk_explained():
+    return render_template('lesk_explained.html')
+# Add error template handler
+@app.route('/error')
+def error():
+    error_message = request.args.get('message', 'An unknown error occurred')
+    error_details = request.args.get('details', '')
+    return render_template('error.html', error_message=error_message, error_details=error_details)
+if __name__ == '__main__':
+    app.run(debug=True)

templates/error.html ADDED Viewed

	@@ -0,0 +1,84 @@

+<!-- templates/error.html -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Error - Word Sense Disambiguation Tool</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+    <style>
+        body {
+            background-color: #f8f9fa;
+        }
+        .navbar {
+            box-shadow: 0 2px 4px rgba(0,0,0,0.07);
+        }
+        .main-container {
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 2rem;
+            background-color: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.05);
+        }
+        .error-details {
+            background-color: #f5f5f5;
+            padding: 1rem;
+            border-radius: 4px;
+            font-family: monospace;
+            white-space: pre-wrap;
+            margin-top: 1rem;
+        }
+    </style>
+</head>
+<body>
+    <!-- Navbar -->
+    <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
+        <div class="container">
+            <a class="navbar-brand" href="/">WSD Tool</a>
+            <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarNav">
+                <ul class="navbar-nav ms-auto">
+                    <li class="nav-item">
+                        <a href="{{ url_for('index') }}" class="btn btn-outline-primary">
+                            ← Back to Input
+                        </a>
+                    </li>
+                </ul>
+            </div>
+        </div>
+    </nav>
+    <div class="container main-container">
+        <div class="text-center mb-4">
+            <h2 class="text-danger">Error</h2>
+        </div>
+        <div class="alert alert-danger">
+            {{ error_message }}
+        </div>
+        {% if error_details %}
+        <div class="error-details">
+            {{ error_details }}
+        </div>
+        {% endif %}
+        <div class="mt-4">
+            <p>You can try the following:</p>
+            <ul>
+                <li>Use a different word or sentence</li>
+                <li>Make sure the word has multiple meanings in WordNet</li>
+                <li>Provide more context around the ambiguous word</li>
+            </ul>
+        </div>
+        <div class="text-center mt-5">
+            <a href="{{ url_for('index') }}" class="btn btn-primary">Return to Input</a>
+        </div>
+    </div>
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+</body>
+</html>

templates/index.html ADDED Viewed

	@@ -0,0 +1,127 @@

+<!-- templates/index.html -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Word Sense Disambiguation Tool</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+    <style>
+        body {
+            background-color: #f8f9fa;
+        }
+        .navbar {
+            box-shadow: 0 2px 4px rgba(0,0,0,0.07);
+        }
+        .main-container {
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 2rem;
+            background-color: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.05);
+        }
+        .form-control:focus {
+            border-color: #6c757d;
+            box-shadow: 0 0 0 0.25rem rgba(108, 117, 125, 0.25);
+        }
+        .example-btn {
+            margin-right: 0.5rem;
+            margin-bottom: 0.5rem;
+        }
+    </style>
+</head>
+<body>
+    <!-- Navbar with Lesk Algorithm Explanation Link -->
+    <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
+        <div class="container">
+            <a class="navbar-brand" href="/">WSD Tool</a>
+            <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarNav">
+                <ul class="navbar-nav ms-auto">
+                    <li class="nav-item">
+                        <a class="nav-link" href="{{ url_for('lesk_explained') }}">
+                            Learn about Lesk Algorithm and Working
+                        </a>
+                    </li>
+                </ul>
+            </div>
+        </div>
+    </nav>
+    <div class="container main-container">
+        <h2 class="mb-4 text-center">Word Sense Disambiguation</h2>
+        <p class="lead text-center mb-4">
+            Enter text with ambiguous words to disambiguate their meanings
+        </p>
+        <form action="{{ url_for('index') }}" method="post">
+            <div class="mb-3">
+                <label for="text" class="form-label">Text:</label>
+                <textarea class="form-control" id="text" name="text" rows="5" required></textarea>
+            </div>
+            <div class="mb-3">
+                <label for="target_word" class="form-label">
+                    Target Word (optional):
+                    <small class="text-muted">If left empty, the first ambiguous word will be selected</small>
+                </label>
+                <input type="text" class="form-control" id="target_word" name="target_word">
+            </div>
+            <div class="d-grid gap-2">
+                <button type="submit" class="btn btn-primary">Disambiguate</button>
+            </div>
+        </form>
+        <div class="mt-4">
+            <h5>Example Sentences:</h5>
+            <div class="d-flex flex-wrap">
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('She saw a bat flying in the dark.', 'bat')">
+                    Bat (animal)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('The baseball player swung the bat.', 'bat')">
+                    Bat (sports)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('The bat had leathery wings and flew silently.', 'bat')">
+                    Bat (with wings)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('I need to go to the bank to deposit some money.', 'bank')">
+                    Bank (financial)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('We sat by the river bank and had a picnic.', 'bank')">
+                    Bank (riverside)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('The bass was too loud during the concert.', 'bass')">
+                    Bass (sound)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('He caught a large bass while fishing.', 'bass')">
+                    Bass (fish)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('Spring is my favorite season of the year.', 'spring')">
+                    Spring (season)
+                </button>
+                <button class="btn btn-sm btn-outline-secondary example-btn"
+                        onclick="fillExample('The spring in the mattress was broken.', 'spring')">
+                    Spring (coil)
+                </button>
+            </div>
+        </div>
+    </div>
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+    <script>
+        function fillExample(text, word) {
+            document.getElementById('text').value = text;
+            document.getElementById('target_word').value = word;
+        }
+    </script>
+</body>
+</html>

templates/lesk_explained.html ADDED Viewed

	@@ -0,0 +1,213 @@

+<!-- templates/lesk_explained.html -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Lesk Algorithm Explained</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+    <style>
+        body {
+            background-color: #f8f9fa;
+        }
+        .navbar {
+            box-shadow: 0 2px 4px rgba(0,0,0,0.07);
+        }
+        .main-container {
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 2rem;
+            background-color: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.05);
+        }
+        .code-block {
+            background-color: #f5f5f5;
+            padding: 1rem;
+            border-radius: 4px;
+            font-family: monospace;
+            white-space: pre-wrap;
+        }
+        .algorithm-step {
+            background-color: #e9ecef;
+            padding: 1rem;
+            border-radius: 6px;
+            margin-bottom: 1rem;
+        }
+        .enhancement {
+            background-color: #e3f2fd;
+            border-left: 4px solid #2196f3;
+            padding: 1rem;
+            margin-bottom: 1rem;
+        }
+    </style>
+</head>
+<body>
+    <!-- Navbar -->
+    <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
+        <div class="container">
+            <a class="navbar-brand" href="/">WSD Tool</a>
+            <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarNav">
+                <ul class="navbar-nav ms-auto">
+                    <li class="nav-item">
+                        <a href="{{ url_for('index') }}" class="btn btn-outline-primary">
+                            ← Back to Tool
+                        </a>
+                    </li>
+                </ul>
+            </div>
+        </div>
+    </nav>
+    <div class="container main-container">
+        <h2 class="mb-4">The Enhanced Lesk Algorithm for Word Sense Disambiguation</h2>
+        <div class="mb-4">
+            <h4>What is Word Sense Disambiguation?</h4>
+            <p>
+                Word Sense Disambiguation (WSD) is the task of identifying which sense of a word is used in a sentence when the word has multiple meanings. For example, the word "bat" can refer to a flying mammal or a piece of sports equipment.
+            </p>
+        </div>
+        <div class="mb-4">
+            <h4>The Original Lesk Algorithm</h4>
+            <p>
+                The Lesk algorithm, introduced by Michael Lesk in 1986, is one of the earliest and most influential algorithms for WSD. It uses dictionary definitions to determine the correct sense of a word in context.
+            </p>
+            <div class="algorithm-step">
+                <h5>Basic Idea:</h5>
+                <p>The sense whose dictionary definition shares the most words with the context is likely the correct sense.</p>
+            </div>
+        </div>
+        <div class="mb-4">
+            <h4>Our Enhanced Lesk Implementation</h4>
+            <p>Our implementation extends the original Lesk algorithm with several modern enhancements:</p>
+            <div class="enhancement">
+                <h5>1. Rich Sense Signatures</h5>
+                <p>We expand the sense signature beyond just definitions to include:</p>
+                <ul>
+                    <li>Example sentences from WordNet</li>
+                    <li>Hypernyms (parent concepts)</li>
+                    <li>Hyponyms (child concepts)</li>
+                    <li>Meronyms and holonyms (part-whole relationships)</li>
+                </ul>
+            </div>
+            <div class="enhancement">
+                <h5>2. BERT Integration</h5>
+                <p>We incorporate BERT contextual embeddings to capture deeper semantic relationships between the context and sense definitions.</p>
+            </div>
+            <div class="enhancement">
+                <h5>3. Rule-Based Components</h5>
+                <p>For common ambiguous words, we add targeted rules to handle cases where statistical methods might fail.</p>
+            </div>
+            <div class="enhancement">
+                <h5>4. Collocation Detection</h5>
+                <p>We identify common word combinations (collocations) that strongly indicate specific senses.</p>
+            </div>
+            <div class="enhancement">
+                <h5>5. Adaptive Learning</h5>
+                <p>The system learns from user feedback to improve future disambiguations of similar contexts.</p>
+            </div>
+        </div>
+        <div class="mb-4">
+            <h4>How Our Algorithm Works</h4>
+            <div class="algorithm-step">
+                <h5>Step 1: Context Processing</h5>
+                <p>Extract and process context words from the sentence, giving more weight to words closer to the target word.</p>
+            </div>
+            <div class="algorithm-step">
+                <h5>Step 2: Collocation Check</h5>
+                <p>Check for strong collocations that directly indicate a specific sense (e.g., "bat flying" strongly indicates the animal sense).</p>
+            </div>
+            <div class="algorithm-step">
+                <h5>Step 3: Rule Application</h5>
+                <p>Apply targeted rules for common ambiguous words based on contextual indicators.</p>
+            </div>
+            <div class="algorithm-step">
+                <h5>Step 4: Sense Signature Creation</h5>
+                <p>For each possible sense, create a rich signature from definitions, examples, and related concepts.</p>
+            </div>
+            <div class="algorithm-step">
+                <h5>Step 5: Overlap Calculation</h5>
+                <p>Calculate the weighted overlap between context words and each sense signature.</p>
+            </div>
+            <div class="algorithm-step">
+                <h5>Step 6: BERT Similarity</h5>
+                <p>Calculate semantic similarity between the context and each sense definition using BERT embeddings.</p>
+            </div>
+            <div class="algorithm-step">
+                <h5>Step 7: Score Combination</h5>
+                <p>Combine all scores (overlap, BERT, rules, feedback) to determine the most likely sense.</p>
+            </div>
+        </div>
+        <div class="mb-4">
+            <h4>Example</h4>
+            <p>For the sentence "She saw a bat flying in the dark":</p>
+            <div class="code-block">
+Target word: "bat"
+Possible senses:
+1. "a nocturnal mammal with wings"
+2. "a implement used for hitting a ball in sports"
+Context words: [she, saw, flying, dark]
+Collocation check: "bat flying" → strong indicator of animal sense
+Rule application: "flying" → animal sense rule triggered
+Sense 1 signature: [nocturnal, mammal, wing, fly, night, animal, cave, ...]
+Sense 2 signature: [implement, hit, ball, sport, game, baseball, cricket, ...]
+Overlap scores:
+- Sense 1: High overlap with "flying" and "dark" (related to nocturnal, night)
+- Sense 2: Low overlap with context words
+BERT similarity:
+- Sense 1: High similarity between "bat flying in the dark" and "nocturnal mammal with wings"
+- Sense 2: Lower similarity with sports equipment definition
+Final scores:
+- Sense 1 (animal): 8.7
+- Sense 2 (sports): 2.3
+Result: Sense 1 is selected as the correct meaning.</div>
+        </div>
+        <div class="mb-4">
+            <h4>Advantages Over Basic Lesk</h4>
+            <ul>
+                <li>Higher accuracy for common ambiguous words</li>
+                <li>Better handling of contextual nuances</li>
+                <li>Integration of modern NLP techniques</li>
+                <li>Adaptive learning from user feedback</li>
+                <li>Combination of statistical and rule-based approaches</li>
+            </ul>
+        </div>
+        <div class="text-center mt-5">
+            <a href="{{ url_for('index') }}" class="btn btn-primary">Try the WSD Tool</a>
+        </div>
+    </div>
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+</body>
+</html>

templates/results.html ADDED Viewed

	@@ -0,0 +1,208 @@

+<!-- templates/results.html -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Disambiguation Results</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link
+      href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css"
+      rel="stylesheet"
+    >
+    <style>
+        .highlight-word {
+            background-color: #FFD700;
+            padding: 2px 5px;
+            border-radius: 3px;
+            font-weight: bold;
+        }
+        .navbar {
+            box-shadow: 0 2px 4px rgba(0,0,0,0.07);
+        }
+        .card {
+            margin-bottom: 1.5rem;
+            box-shadow: 0 2px 8px rgba(0,0,0,0.05);
+        }
+        #updatedSense {
+            display: none;
+        }
+        .context-badge {
+            margin-right: 5px;
+            margin-bottom: 5px;
+            background-color: #e9ecef;
+            color: #495057;
+        }
+        .lexname-badge {
+            background-color: #17a2b8;
+            color: white;
+        }
+        .sense-card {
+            transition: all 0.3s ease;
+        }
+        .sense-card:hover {
+            transform: translateY(-5px);
+            box-shadow: 0 4px 12px rgba(0,0,0,0.1);
+        }
+        .algorithm-info {
+            font-size: 0.9rem;
+            color: #6c757d;
+        }
+    </style>
+</head>
+<body>
+    <!-- Navbar with Lesk Algorithm Explanation Link -->
+    <nav class="navbar navbar-expand-lg navbar-light bg-light mb-4">
+        <div class="container">
+            <a class="navbar-brand" href="/">WSD Tool</a>
+            <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarNav">
+                <ul class="navbar-nav ms-auto align-items-center">
+                    <li class="nav-item">
+                        <a class="nav-link" href="{{ url_for('lesk_explained') }}">
+                            Learn about Lesk Algorithm
+                        </a>
+                    </li>
+                    <li class="nav-item">
+                        <a href="{{ url_for('index') }}" class="btn btn-outline-primary ms-2">
+                            ← Back to Input
+                        </a>
+                    </li>
+                </ul>
+            </div>
+        </div>
+    </nav>
+    <div class="container">
+        <!-- Original Text -->
+        <div class="mb-4">
+            <h5>Original Text:</h5>
+            <div class="p-3 bg-light rounded">
+                {{ highlighted_text|safe }}
+            </div>
+        </div>
+        {% if best_sense %}
+        <!-- Selected Sense Card -->
+        <div class="card sense-card" id="selectedSense">
+            <div class="card-header bg-primary text-white">
+                Selected Sense
+            </div>
+            <div class="card-body">
+                <h5 id="senseDefinition">{{ best_sense.definition() }}</h5>
+                <p class="text-muted">Lexical Category: <span class="badge lexname-badge">{{ best_sense.lexname() }}</span></p>
+                {% if best_sense.examples() %}
+                <div class="mt-2">
+                    <strong>Examples:</strong>
+                    <ul id="senseExamples">
+                        {% for example in best_sense.examples() %}
+                        <li>{{ example }}</li>
+                        {% endfor %}
+                    </ul>
+                </div>
+                {% endif %}
+                <!-- Show context words that influenced the decision -->
+                <div class="mt-3">
+                    <strong>Context words used:</strong>
+                    <div class="mt-2">
+                        {% for word in context_words.split(', ') %}
+                        <span class="badge context-badge">{{ word }}</span>
+                        {% endfor %}
+                    </div>
+                </div>
+                <div class="mt-3 algorithm-info">
+                    <p>This sense was selected using Enhanced Lesk algorithm with BERT semantic similarity and rule-based components.</p>
+                </div>
+            </div>
+        </div>
+        <!-- Updated Sense Section (Initially Hidden) -->
+        <div class="card border-success mb-4 sense-card" id="updatedSense">
+            <div class="card-header bg-success text-white">
+                Updated Sense (Based on Feedback)
+            </div>
+            <div class="card-body">
+                <h5 id="updatedDefinition"></h5>
+                <div class="mt-2">
+                    <strong>Examples:</strong>
+                    <ul id="updatedExamples"></ul>
+                </div>
+                <div class="mt-3 algorithm-info">
+                    <p>Your feedback has been recorded and will improve future disambiguations.</p>
+                </div>
+            </div>
+        </div>
+        <!-- Top 3 Alternatives -->
+        <h5 class="mt-4">Top 3 Alternative Senses:</h5>
+        {% for sense in top_senses %}
+        <div class="card mb-3 sense-card">
+            <div class="card-body">
+                <p><strong>{{ sense.definition() }}</strong></p>
+                <p class="text-muted small">Lexical Category: <span class="badge lexname-badge">{{ sense.lexname() }}</span></p>
+                {% if sense.examples() %}
+                <p class="small">Example: "{{ sense.examples()[0] }}"</p>
+                {% endif %}
+                <button class="btn btn-sm btn-outline-primary feedback-btn"
+                        data-sense="{{ sense.name() }}">
+                    This is the correct meaning
+                </button>
+            </div>
+        </div>
+        {% endfor %}
+        {% else %}
+        <div class="alert alert-warning mt-4">
+            No ambiguous words detected in the text.
+        </div>
+        {% endif %}
+    </div>
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
+    <script>
+    document.querySelectorAll('.feedback-btn').forEach(btn => {
+        btn.addEventListener('click', async () => {
+            const senseName = btn.dataset.sense;
+            const word = "{{ target_word }}";
+            const context = "{{ sentence }}".toLowerCase().split(/[^a-z]+/).filter(w => w !== "");
+            try {
+                const response = await fetch('/feedback', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ word, context, correct_sense: senseName })
+                });
+                const updatedSense = await response.json();
+                // Show updated sense section
+                const updatedSection = document.getElementById('updatedSense');
+                document.getElementById('updatedDefinition').textContent = updatedSense.definition;
+                const examplesList = document.getElementById('updatedExamples');
+                examplesList.innerHTML = '';
+                if(updatedSense.examples && updatedSense.examples.length > 0) {
+                    updatedSense.examples.forEach(example => {
+                        const li = document.createElement('li');
+                        li.textContent = example;
+                        examplesList.appendChild(li);
+                    });
+                }
+                updatedSection.style.display = 'block';
+                setTimeout(() => {
+                    window.scrollTo({
+                        top: updatedSection.offsetTop - 100,
+                        behavior: 'smooth'
+                    });
+                }, 100);
+            } catch (error) {
+                console.error('Feedback error:', error);
+            }
+        });
+    });
+    </script>
+</body>
+</html>