Upload 7 files
Browse files- README.md +47 -8
- app.py +168 -0
- lexicons/english_lexicon.json +197 -0
- lexicons/persian_lexicon.json +110 -0
- lexicons/turkish_lexicon.json +119 -0
- requirements.txt +2 -0
- sentiment_analyzer.py +555 -0
README.md
CHANGED
|
@@ -1,13 +1,52 @@
|
|
| 1 |
---
|
| 2 |
-
title: Sentiment
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
-
app_file:
|
| 9 |
pinned: false
|
| 10 |
-
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Multilingual Sentiment Analysis
|
| 3 |
+
emoji: 🌍
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
+
app_file: app_gradio.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Multilingual Sentiment Analysis Tool
|
| 14 |
+
|
| 15 |
+
A comprehensive sentiment analysis tool supporting **English**, **Turkish**, and **Persian** languages using non-deep-learning approaches (lexicon-based, rule-based, and hybrid methods).
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- 🌍 **Multilingual Support**: English, Turkish, and Persian
|
| 20 |
+
- 🔧 **Multiple Methods**: Lexicon-based, rule-based, and hybrid approaches
|
| 21 |
+
- 📊 **Batch Processing**: Analyze multiple texts at once
|
| 22 |
+
- ✨ **Advanced Rules**:
|
| 23 |
+
- Comprehensive sentiment lexicons (200+ words per language)
|
| 24 |
+
- Idiom detection
|
| 25 |
+
- Emoticon and emoji support
|
| 26 |
+
- Negation scope detection
|
| 27 |
+
- Intensifier and diminisher handling
|
| 28 |
+
- Contrast word detection
|
| 29 |
+
- And much more!
|
| 30 |
+
|
| 31 |
+
## Usage
|
| 32 |
+
|
| 33 |
+
1. Select your language (English, Turkish, or Persian)
|
| 34 |
+
2. Choose analysis method (Lexicon, Rule-based, or Hybrid)
|
| 35 |
+
3. Enter text and click "Analyze Sentiment"
|
| 36 |
+
4. View detailed results with polarity, confidence, and scores
|
| 37 |
+
|
| 38 |
+
## Methods
|
| 39 |
+
|
| 40 |
+
- **Lexicon-based**: Uses predefined sentiment dictionaries
|
| 41 |
+
- **Rule-based**: Extends lexicon with linguistic rules
|
| 42 |
+
- **Hybrid** (Recommended): Combines both approaches for best results
|
| 43 |
+
|
| 44 |
+
## Citation
|
| 45 |
+
|
| 46 |
+
If you use this tool in your research, please cite:
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
Multilingual Sentiment Analysis Tool (2024)
|
| 50 |
+
Non-Deep-Learning Approaches for Sentiment Analysis
|
| 51 |
+
```
|
| 52 |
+
|
app.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio App for Multilingual Sentiment Analysis
|
| 3 |
+
Deploy this to Hugging Face Spaces
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from sentiment_analyzer import MultilingualSentimentAnalyzer
|
| 8 |
+
|
| 9 |
+
def analyze_sentiment(text, language, method):
|
| 10 |
+
"""Analyze sentiment and return formatted results"""
|
| 11 |
+
if not text or not text.strip():
|
| 12 |
+
return "Please enter some text to analyze."
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
analyzer = MultilingualSentimentAnalyzer(language=language, method=method)
|
| 16 |
+
result = analyzer.analyze(text)
|
| 17 |
+
|
| 18 |
+
# Format the output nicely
|
| 19 |
+
output = f"""
|
| 20 |
+
## Sentiment Analysis Results
|
| 21 |
+
|
| 22 |
+
**Polarity:** {result['polarity'].upper()}
|
| 23 |
+
**Confidence:** {result['confidence']*100:.1f}%
|
| 24 |
+
|
| 25 |
+
**Scores:**
|
| 26 |
+
- Positive: {result['positive_score']:.2f}
|
| 27 |
+
- Negative: {result['negative_score']:.2f}
|
| 28 |
+
|
| 29 |
+
**Details:**
|
| 30 |
+
- Method: {result['method']}
|
| 31 |
+
- Language: {result['language']}
|
| 32 |
+
- Words analyzed: {result.get('word_count', 0)}
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
return output
|
| 36 |
+
except Exception as e:
|
| 37 |
+
return f"Error: {str(e)}"
|
| 38 |
+
|
| 39 |
+
def batch_analyze(texts, language, method):
|
| 40 |
+
"""Analyze multiple texts"""
|
| 41 |
+
if not texts:
|
| 42 |
+
return "Please enter texts to analyze (one per line)."
|
| 43 |
+
|
| 44 |
+
text_list = [t.strip() for t in texts.split('\n') if t.strip()]
|
| 45 |
+
if not text_list:
|
| 46 |
+
return "No valid texts found."
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
analyzer = MultilingualSentimentAnalyzer(language=language, method=method)
|
| 50 |
+
results = analyzer.analyze_batch(text_list)
|
| 51 |
+
stats = analyzer.get_statistics(text_list)
|
| 52 |
+
|
| 53 |
+
output = f"""
|
| 54 |
+
## Batch Analysis Results
|
| 55 |
+
|
| 56 |
+
**Statistics:**
|
| 57 |
+
- Total texts: {stats['total_texts']}
|
| 58 |
+
- Average confidence: {stats['average_confidence']*100:.1f}%
|
| 59 |
+
|
| 60 |
+
**Polarity Distribution:**
|
| 61 |
+
"""
|
| 62 |
+
for polarity, percentage in stats['polarity_percentages'].items():
|
| 63 |
+
output += f"- {polarity.capitalize()}: {percentage}%\n"
|
| 64 |
+
|
| 65 |
+
output += "\n**Individual Results:**\n"
|
| 66 |
+
for i, (text, result) in enumerate(zip(text_list, results), 1):
|
| 67 |
+
output += f"\n{i}. \"{text[:50]}...\" → {result['polarity']} ({result['confidence']*100:.1f}%)\n"
|
| 68 |
+
|
| 69 |
+
return output
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return f"Error: {str(e)}"
|
| 72 |
+
|
| 73 |
+
# Create Gradio interface
|
| 74 |
+
with gr.Blocks(title="Multilingual Sentiment Analysis", theme=gr.themes.Soft()) as demo:
|
| 75 |
+
gr.Markdown("""
|
| 76 |
+
# 🌍 Multilingual Sentiment Analysis Tool
|
| 77 |
+
|
| 78 |
+
Analyze sentiment in **English**, **Turkish**, and **Persian** text using non-deep-learning approaches.
|
| 79 |
+
|
| 80 |
+
This tool uses lexicon-based, rule-based, and hybrid methods for interpretable sentiment analysis.
|
| 81 |
+
""")
|
| 82 |
+
|
| 83 |
+
with gr.Tabs():
|
| 84 |
+
with gr.TabItem("Single Text Analysis"):
|
| 85 |
+
with gr.Row():
|
| 86 |
+
with gr.Column():
|
| 87 |
+
text_input = gr.Textbox(
|
| 88 |
+
label="Enter Text",
|
| 89 |
+
placeholder="Type your text here...",
|
| 90 |
+
lines=5
|
| 91 |
+
)
|
| 92 |
+
language = gr.Dropdown(
|
| 93 |
+
choices=["english", "turkish", "persian"],
|
| 94 |
+
value="english",
|
| 95 |
+
label="Language"
|
| 96 |
+
)
|
| 97 |
+
method = gr.Dropdown(
|
| 98 |
+
choices=["lexicon", "rule", "hybrid"],
|
| 99 |
+
value="hybrid",
|
| 100 |
+
label="Analysis Method"
|
| 101 |
+
)
|
| 102 |
+
analyze_btn = gr.Button("Analyze Sentiment", variant="primary")
|
| 103 |
+
|
| 104 |
+
with gr.Column():
|
| 105 |
+
output = gr.Markdown(label="Results")
|
| 106 |
+
|
| 107 |
+
analyze_btn.click(
|
| 108 |
+
fn=analyze_sentiment,
|
| 109 |
+
inputs=[text_input, language, method],
|
| 110 |
+
outputs=output
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
with gr.TabItem("Batch Analysis"):
|
| 114 |
+
with gr.Row():
|
| 115 |
+
with gr.Column():
|
| 116 |
+
batch_texts = gr.Textbox(
|
| 117 |
+
label="Enter Texts (one per line)",
|
| 118 |
+
placeholder="Enter multiple texts, one per line...",
|
| 119 |
+
lines=10
|
| 120 |
+
)
|
| 121 |
+
batch_language = gr.Dropdown(
|
| 122 |
+
choices=["english", "turkish", "persian"],
|
| 123 |
+
value="english",
|
| 124 |
+
label="Language"
|
| 125 |
+
)
|
| 126 |
+
batch_method = gr.Dropdown(
|
| 127 |
+
choices=["lexicon", "rule", "hybrid"],
|
| 128 |
+
value="hybrid",
|
| 129 |
+
label="Analysis Method"
|
| 130 |
+
)
|
| 131 |
+
batch_btn = gr.Button("Analyze Batch", variant="primary")
|
| 132 |
+
|
| 133 |
+
with gr.Column():
|
| 134 |
+
batch_output = gr.Markdown(label="Batch Results")
|
| 135 |
+
|
| 136 |
+
batch_btn.click(
|
| 137 |
+
fn=batch_analyze,
|
| 138 |
+
inputs=[batch_texts, batch_language, batch_method],
|
| 139 |
+
outputs=batch_output
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
with gr.TabItem("Examples"):
|
| 143 |
+
gr.Markdown("""
|
| 144 |
+
### Example Texts to Try:
|
| 145 |
+
|
| 146 |
+
**English:**
|
| 147 |
+
- "I love this product! It's absolutely amazing!!! 😊"
|
| 148 |
+
- "This is terrible. I hate it."
|
| 149 |
+
- "Not bad, actually it's quite good!"
|
| 150 |
+
|
| 151 |
+
**Turkish:**
|
| 152 |
+
- "Bu ürünü çok seviyorum! Harika!"
|
| 153 |
+
- "Berbat bir deneyim. Hiç beğenmedim."
|
| 154 |
+
|
| 155 |
+
**Persian:**
|
| 156 |
+
- "این محصول عالی است!"
|
| 157 |
+
- "خیلی بد بود"
|
| 158 |
+
""")
|
| 159 |
+
|
| 160 |
+
gr.Markdown("""
|
| 161 |
+
---
|
| 162 |
+
**About:** This tool uses lexicon-based, rule-based, and hybrid approaches (without deep learning)
|
| 163 |
+
for interpretable sentiment analysis. Supports English, Turkish, and Persian languages.
|
| 164 |
+
""")
|
| 165 |
+
|
| 166 |
+
if __name__ == "__main__":
|
| 167 |
+
demo.launch()
|
| 168 |
+
|
lexicons/english_lexicon.json
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"positive": [
|
| 3 |
+
"good", "great", "excellent", "amazing", "wonderful", "fantastic", "superb", "marvelous",
|
| 4 |
+
"love", "like", "adore", "cherish", "treasure", "appreciate", "enjoy", "delight",
|
| 5 |
+
"best", "perfect", "ideal", "flawless", "impeccable", "outstanding", "remarkable",
|
| 6 |
+
"beautiful", "gorgeous", "stunning", "lovely", "charming", "attractive", "appealing",
|
| 7 |
+
"nice", "pleasant", "agreeable", "satisfying", "pleasing", "gratifying",
|
| 8 |
+
"happy", "joyful", "cheerful", "glad", "pleased", "delighted", "thrilled", "ecstatic",
|
| 9 |
+
"satisfied", "content", "fulfilled", "gratified", "comfortable", "at ease",
|
| 10 |
+
"awesome", "brilliant", "magnificent", "splendid", "fabulous", "terrific", "incredible",
|
| 11 |
+
"successful", "triumphant", "victorious", "winning", "prosperous", "flourishing",
|
| 12 |
+
"optimistic", "hopeful", "confident", "positive", "upbeat", "encouraging",
|
| 13 |
+
"grateful", "thankful", "appreciative", "obliged", "indebted",
|
| 14 |
+
"excited", "enthusiastic", "eager", "passionate", "zealous", "ardent",
|
| 15 |
+
"proud", "honored", "privileged", "blessed", "fortunate", "lucky",
|
| 16 |
+
"impressive", "admirable", "praiseworthy", "commendable", "laudable",
|
| 17 |
+
"refreshing", "invigorating", "energizing", "uplifting", "inspiring",
|
| 18 |
+
"reliable", "trustworthy", "dependable", "solid", "steady", "consistent",
|
| 19 |
+
"valuable", "precious", "worthwhile", "beneficial", "advantageous", "profitable",
|
| 20 |
+
"smooth", "easy", "effortless", "seamless", "convenient", "user-friendly",
|
| 21 |
+
"innovative", "creative", "original", "unique", "distinctive", "special",
|
| 22 |
+
"professional", "expert", "skilled", "competent", "capable", "proficient",
|
| 23 |
+
"helpful", "supportive", "beneficial", "useful", "practical", "effective",
|
| 24 |
+
"clear", "transparent", "honest", "genuine", "authentic", "sincere",
|
| 25 |
+
"peaceful", "calm", "serene", "tranquil", "relaxing", "soothing",
|
| 26 |
+
"fun", "entertaining", "enjoyable", "amusing", "delightful", "pleasurable",
|
| 27 |
+
"fast", "quick", "rapid", "swift", "efficient", "speedy",
|
| 28 |
+
"affordable", "reasonable", "fair", "economical", "budget-friendly",
|
| 29 |
+
"modern", "contemporary", "up-to-date", "current", "fresh", "new",
|
| 30 |
+
"clean", "tidy", "organized", "neat", "orderly", "pristine",
|
| 31 |
+
"safe", "secure", "protected", "reliable", "stable", "sound",
|
| 32 |
+
"flexible", "adaptable", "versatile", "multipurpose", "all-purpose",
|
| 33 |
+
"recommend", "suggest", "endorse", "approve", "support", "back",
|
| 34 |
+
"exceed", "surpass", "outperform", "outshine", "beat", "top"
|
| 35 |
+
],
|
| 36 |
+
"negative": [
|
| 37 |
+
"bad", "terrible", "awful", "horrible", "dreadful", "atrocious", "appalling",
|
| 38 |
+
"worst", "poorest", "lowest", "inferior", "substandard", "unacceptable",
|
| 39 |
+
"hate", "loathe", "despise", "detest", "abhor", "disgust", "repulse",
|
| 40 |
+
"dislike", "disapprove", "reject", "refuse", "decline", "deny",
|
| 41 |
+
"poor", "inadequate", "insufficient", "deficient", "lacking", "wanting",
|
| 42 |
+
"disappointed", "let down", "disillusioned", "disheartened", "discouraged",
|
| 43 |
+
"sad", "unhappy", "miserable", "depressed", "down", "blue", "gloomy",
|
| 44 |
+
"angry", "mad", "furious", "enraged", "irritated", "annoyed", "upset",
|
| 45 |
+
"frustrated", "exasperated", "aggravated", "bothered", "irked", "vexed",
|
| 46 |
+
"annoying", "irritating", "bothersome", "troublesome", "pesky", "nagging",
|
| 47 |
+
"boring", "tedious", "dull", "monotonous", "repetitive", "tiresome",
|
| 48 |
+
"ugly", "unattractive", "hideous", "repulsive", "revolting", "disgusting",
|
| 49 |
+
"pathetic", "pitiful", "lamentable", "deplorable", "regrettable",
|
| 50 |
+
"miserable", "wretched", "unfortunate", "unlucky", "hapless",
|
| 51 |
+
"depressing", "disheartening", "discouraging", "demoralizing", "daunting",
|
| 52 |
+
"unpleasant", "disagreeable", "offensive", "repugnant", "repellent",
|
| 53 |
+
"disappointing", "unsatisfactory", "inadequate", "subpar", "below expectations",
|
| 54 |
+
"frustrating", "infuriating", "maddening", "exasperating", "aggravating",
|
| 55 |
+
"confusing", "bewildering", "perplexing", "puzzling", "mystifying",
|
| 56 |
+
"difficult", "hard", "challenging", "tough", "arduous", "strenuous",
|
| 57 |
+
"slow", "sluggish", "slack", "delayed", "late", "behind schedule",
|
| 58 |
+
"expensive", "costly", "pricey", "overpriced", "unaffordable", "exorbitant",
|
| 59 |
+
"outdated", "obsolete", "old-fashioned", "archaic", "antiquated",
|
| 60 |
+
"dirty", "filthy", "unclean", "messy", "disorganized", "cluttered",
|
| 61 |
+
"unsafe", "dangerous", "risky", "hazardous", "perilous", "precarious",
|
| 62 |
+
"broken", "damaged", "defective", "faulty", "malfunctioning", "flawed",
|
| 63 |
+
"unreliable", "untrustworthy", "undependable", "inconsistent", "unstable",
|
| 64 |
+
"useless", "worthless", "pointless", "futile", "ineffective", "inefficient",
|
| 65 |
+
"complicated", "complex", "convoluted", "intricate", "elaborate",
|
| 66 |
+
"waste", "squander", "throw away", "lose", "miss", "fail",
|
| 67 |
+
"problem", "issue", "trouble", "difficulty", "hardship", "obstacle",
|
| 68 |
+
"error", "mistake", "fault", "flaw", "defect", "bug",
|
| 69 |
+
"complaint", "grievance", "objection", "protest", "criticism",
|
| 70 |
+
"regret", "remorse", "sorrow", "grief", "anguish", "distress",
|
| 71 |
+
"worry", "concern", "anxiety", "stress", "tension", "pressure",
|
| 72 |
+
"fear", "dread", "terror", "panic", "alarm", "apprehension",
|
| 73 |
+
"pain", "ache", "hurt", "suffering", "agony", "torment",
|
| 74 |
+
"weak", "feeble", "frail", "fragile", "delicate", "vulnerable",
|
| 75 |
+
"stupid", "foolish", "silly", "ridiculous", "absurd", "nonsensical",
|
| 76 |
+
"lazy", "sluggish", "inactive", "idle", "indolent", "lethargic",
|
| 77 |
+
"rude", "impolite", "discourteous", "ill-mannered", "offensive",
|
| 78 |
+
"selfish", "greedy", "self-centered", "egotistical", "narcissistic",
|
| 79 |
+
"dishonest", "deceptive", "misleading", "fraudulent", "deceitful",
|
| 80 |
+
"unfair", "unjust", "biased", "prejudiced", "discriminatory",
|
| 81 |
+
"reject", "refuse", "decline", "deny", "dismiss", "turn down"
|
| 82 |
+
],
|
| 83 |
+
"intensifiers": {
|
| 84 |
+
"very": 1.5,
|
| 85 |
+
"extremely": 2.0,
|
| 86 |
+
"really": 1.3,
|
| 87 |
+
"quite": 1.2,
|
| 88 |
+
"too": 1.4,
|
| 89 |
+
"so": 1.3,
|
| 90 |
+
"absolutely": 1.8,
|
| 91 |
+
"completely": 1.5,
|
| 92 |
+
"totally": 1.6,
|
| 93 |
+
"incredibly": 1.7,
|
| 94 |
+
"amazingly": 1.6,
|
| 95 |
+
"exceptionally": 1.7,
|
| 96 |
+
"particularly": 1.4,
|
| 97 |
+
"especially": 1.4,
|
| 98 |
+
"highly": 1.6,
|
| 99 |
+
"greatly": 1.5,
|
| 100 |
+
"significantly": 1.5,
|
| 101 |
+
"substantially": 1.5,
|
| 102 |
+
"considerably": 1.4,
|
| 103 |
+
"tremendously": 1.8,
|
| 104 |
+
"immensely": 1.7,
|
| 105 |
+
"enormously": 1.7,
|
| 106 |
+
"hugely": 1.6,
|
| 107 |
+
"massively": 1.6,
|
| 108 |
+
"dramatically": 1.6,
|
| 109 |
+
"remarkably": 1.6,
|
| 110 |
+
"extraordinarily": 1.8,
|
| 111 |
+
"unbelievably": 1.7,
|
| 112 |
+
"incredibly": 1.7,
|
| 113 |
+
"surprisingly": 1.4,
|
| 114 |
+
"unusually": 1.4,
|
| 115 |
+
"remarkably": 1.6,
|
| 116 |
+
"deeply": 1.5,
|
| 117 |
+
"profoundly": 1.6,
|
| 118 |
+
"thoroughly": 1.5,
|
| 119 |
+
"utterly": 1.7,
|
| 120 |
+
"entirely": 1.5,
|
| 121 |
+
"fully": 1.4,
|
| 122 |
+
"perfectly": 1.6,
|
| 123 |
+
"purely": 1.4,
|
| 124 |
+
"simply": 1.3,
|
| 125 |
+
"just": 1.2,
|
| 126 |
+
"even": 1.2,
|
| 127 |
+
"more": 1.3,
|
| 128 |
+
"most": 1.5,
|
| 129 |
+
"much": 1.4,
|
| 130 |
+
"many": 1.3,
|
| 131 |
+
"most": 1.5,
|
| 132 |
+
"super": 1.5,
|
| 133 |
+
"mega": 1.6,
|
| 134 |
+
"ultra": 1.7,
|
| 135 |
+
"hyper": 1.6,
|
| 136 |
+
"overly": 1.4,
|
| 137 |
+
"excessively": 1.5,
|
| 138 |
+
"intensely": 1.6,
|
| 139 |
+
"severely": 1.5,
|
| 140 |
+
"badly": 1.4,
|
| 141 |
+
"terribly": 1.6,
|
| 142 |
+
"awfully": 1.6,
|
| 143 |
+
"horribly": 1.6,
|
| 144 |
+
"dreadfully": 1.6
|
| 145 |
+
},
|
| 146 |
+
"negation": [
|
| 147 |
+
"not", "no", "never", "none", "nobody", "nothing", "nowhere",
|
| 148 |
+
"neither", "cannot", "can't", "won't", "don't", "doesn't",
|
| 149 |
+
"didn't", "isn't", "aren't", "wasn't", "weren't", "hasn't",
|
| 150 |
+
"haven't", "hadn't", "wouldn't", "couldn't", "shouldn't",
|
| 151 |
+
"mustn't", "mightn't", "mayn't", "shan't", "ain't",
|
| 152 |
+
"without", "lacking", "missing", "absent", "devoid",
|
| 153 |
+
"neither", "nor", "nobody", "nowhere", "nothing",
|
| 154 |
+
"nowhere", "nevermore", "nohow", "nowise", "noways",
|
| 155 |
+
"barely", "hardly", "scarcely", "rarely", "seldom",
|
| 156 |
+
"little", "few", "less", "least", "minimal",
|
| 157 |
+
"refuse", "reject", "deny", "decline", "dismiss"
|
| 158 |
+
],
|
| 159 |
+
"diminishers": {
|
| 160 |
+
"slightly": 0.7,
|
| 161 |
+
"somewhat": 0.8,
|
| 162 |
+
"a bit": 0.7,
|
| 163 |
+
"a little": 0.7,
|
| 164 |
+
"kind of": 0.8,
|
| 165 |
+
"sort of": 0.8,
|
| 166 |
+
"rather": 0.9,
|
| 167 |
+
"pretty": 0.9,
|
| 168 |
+
"fairly": 0.9,
|
| 169 |
+
"relatively": 0.85,
|
| 170 |
+
"moderately": 0.85,
|
| 171 |
+
"reasonably": 0.9,
|
| 172 |
+
"barely": 0.6,
|
| 173 |
+
"hardly": 0.6,
|
| 174 |
+
"scarcely": 0.6,
|
| 175 |
+
"almost": 0.8,
|
| 176 |
+
"nearly": 0.8,
|
| 177 |
+
"partially": 0.7,
|
| 178 |
+
"partly": 0.7
|
| 179 |
+
},
|
| 180 |
+
"contrast_words": [
|
| 181 |
+
"but", "however", "although", "though", "yet", "still",
|
| 182 |
+
"nevertheless", "nonetheless", "despite", "in spite of",
|
| 183 |
+
"whereas", "while", "on the other hand", "conversely"
|
| 184 |
+
],
|
| 185 |
+
"idioms_positive": [
|
| 186 |
+
"over the moon", "on cloud nine", "thrilled to bits", "tickled pink",
|
| 187 |
+
"walking on air", "in seventh heaven", "feeling great", "top notch",
|
| 188 |
+
"second to none", "head and shoulders above", "out of this world",
|
| 189 |
+
"worth its weight in gold", "the bee's knees", "the cat's pajamas"
|
| 190 |
+
],
|
| 191 |
+
"idioms_negative": [
|
| 192 |
+
"down in the dumps", "feeling blue", "under the weather", "out of sorts",
|
| 193 |
+
"at the end of one's rope", "at wit's end", "in a pickle", "in hot water",
|
| 194 |
+
"the last straw", "the final nail in the coffin", "hit rock bottom",
|
| 195 |
+
"go from bad to worse", "go downhill", "go to pieces", "fall apart"
|
| 196 |
+
]
|
| 197 |
+
}
|
lexicons/persian_lexicon.json
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"positive": [
|
| 3 |
+
"خوب", "عالی", "عالیه", "ممتاز", "برجسته", "فوقالعاده",
|
| 4 |
+
"دوست دارم", "خوشم میاد", "پسندیدم", "علاقه دارم", "عاشق", "محبوب",
|
| 5 |
+
"زیبا", "قشنگ", "خوب", "نیک", "خوب", "عالی",
|
| 6 |
+
"موفق", "کامیاب", "پیروز", "فاتح", "برنده", "کامیاب",
|
| 7 |
+
"راضی", "خوشحال", "شاد", "خوش", "مسرور", "خوشنود",
|
| 8 |
+
"لذت بخش", "خوشایند", "مطلوب", "مثبت", "امیدوار", "خوش بین",
|
| 9 |
+
"راضی کننده", "قانع کننده", "رضایت بخش", "خوشایند",
|
| 10 |
+
"ممتاز", "عالی", "برجسته", "فوقالعاده", "استثنایی",
|
| 11 |
+
"خوب", "نیک", "خوب", "عالی", "ممتاز", "برجسته",
|
| 12 |
+
"زیبا", "قشنگ", "خوب", "نیک", "خوب", "عالی",
|
| 13 |
+
"خوشحال", "شاد", "خوش", "مسرور", "خوشنود", "شادمان",
|
| 14 |
+
"مفید", "سودمند", "کارآمد", "عملی", "موثر", "کاربردی",
|
| 15 |
+
"با کیفیت", "مرغوب", "عالی", "برتر", "عالی", "ممتاز",
|
| 16 |
+
"سریع", "تند", "چابک", "عاجل", "فوری", "سریع",
|
| 17 |
+
"ارزان", "اقتصادی", "مقرون به صرفه", "مناسب", "جذاب",
|
| 18 |
+
"تمیز", "پاک", "منظم", "مرتب", "منظم", "پاکیزه",
|
| 19 |
+
"امن", "ایمن", "مطمئن", "قابل اعتماد", "پایدار", "مستحکم",
|
| 20 |
+
"راحت", "آسوده", "آرام", "ساکت", "آرام", "آرامش",
|
| 21 |
+
"سرگرم کننده", "جالب", "خوشایند", "لذت بخش", "خوش", "شاد",
|
| 22 |
+
"جدید", "نو", "مدرن", "معاصر", "جاری", "تازه",
|
| 23 |
+
"آسان", "ساده", "راحت", "قابل فهم", "روشن", "واضح",
|
| 24 |
+
"توصیه میکنم", "پیشنهاد میکنم", "توصیه میکنم", "پیشنهاد میدهم"
|
| 25 |
+
],
|
| 26 |
+
"negative": [
|
| 27 |
+
"بد", "زشت", "ناگوار", "ناخوشایند", "نفرت", "نپسندیدم",
|
| 28 |
+
"بد", "بد", "بد", "زشت", "ناگوار", "ناخوشایند",
|
| 29 |
+
"نپسندیدم", "خوشم نیامد", "دوست ندارم", "نفرت دارم", "متنفرم",
|
| 30 |
+
"غمگین", "عصبانی", "ناراحت", "ناامید", "مایوس", "دلگیر",
|
| 31 |
+
"ناامیدی", "یأس", "ناامیدی", "نومیدی", "ناامیدی",
|
| 32 |
+
"خسته کننده", "کسل کننده", "خستهکننده", "ملالآور", "خستهکننده",
|
| 33 |
+
"ناگوار", "ناخوشایند", "ناپسند", "منفور", "متنفر",
|
| 34 |
+
"ناراضی", "غمگین", "اندوهگین", "غمناک", "دردناک", "اندوهناک",
|
| 35 |
+
"غم", "اندوه", "درد", "رنج", "الم", "غم", "اندوه",
|
| 36 |
+
"عصبانیت", "خشم", "غضب", "خشم", "عصبانیت", "خشم",
|
| 37 |
+
"ناراحت", "ناخوشنود", "ناراضی", "ناراضی", "ناراضی",
|
| 38 |
+
"کند", "آهسته", "دیر", "تاخیر", "تنبل", "کند",
|
| 39 |
+
"گران", "پرهزینه", "گران", "گران قیمت", "پرهزینه",
|
| 40 |
+
"کثیف", "آلوده", "ناپاک", "کثیف", "آلوده", "ناپاک",
|
| 41 |
+
"خطرناک", "ریسکی", "مضر", "خطرناک", "ناامن", "خطرناک",
|
| 42 |
+
"خراب", "معیوب", "ناقص", "ناقص", "ناقص", "ناقص",
|
| 43 |
+
"غیر قابل اعتماد", "غیر قابل اعتماد", "ناپایدار", "ناپایدار", "نامطمئن",
|
| 44 |
+
"بیفایده", "بیفایده", "بیفایده", "بیمعنی", "بیمعنی",
|
| 45 |
+
"پیچیده", "مشکل", "نامفهوم", "مبهم", "مبهم", "مبهم",
|
| 46 |
+
"شکایت", "درد", "مشکل", "مسئله", "ناراحتی", "درد",
|
| 47 |
+
"اشتباه", "خطا", "نقص", "کمبود", "کمبود", "نقص",
|
| 48 |
+
"پشیمانی", "ندامت", "غم", "اندوه", "درد", "الم",
|
| 49 |
+
"نگرانی", "اضطراب", "استرس", "فشار", "تنش", "نگرانی",
|
| 50 |
+
"ترس", "وحشت", "هراس", "هشدار", "نگرانی", "اضطراب",
|
| 51 |
+
"درد", "الم", "رنج", "الم", "الم", "رنج",
|
| 52 |
+
"ضعیف", "ناتوان", "ضعیف", "شکننده", "حساس", "شکننده",
|
| 53 |
+
"احمق", "نادان", "احمق", "بیمعنی", "بیمعنی", "بیمنطق",
|
| 54 |
+
"تنبل", "کند", "بیکار", "خالی", "بیحرکت", "ساکن",
|
| 55 |
+
"بیادب", "ناسزاگو", "زخمزبان", "ناعادلانه", "ناعادلانه",
|
| 56 |
+
"خودخواه", "حریص", "خودخواه", "خودخواه", "خودخواه",
|
| 57 |
+
"نادرست", "فریبنده", "گمراهکننده", "جعلی", "دروغگو",
|
| 58 |
+
"ناعادلانه", "ناعادلانه", "متعصب", "تبعیضآمیز", "نابرابر",
|
| 59 |
+
"رد", "انکار", "نفی", "رد", "نپذیرفتن"
|
| 60 |
+
],
|
| 61 |
+
"intensifiers": {
|
| 62 |
+
"خیلی": 1.5,
|
| 63 |
+
"بسیار": 1.6,
|
| 64 |
+
"فوق العاده": 2.0,
|
| 65 |
+
"کاملا": 1.8,
|
| 66 |
+
"و��قعا": 1.3,
|
| 67 |
+
"نسبتا": 1.2,
|
| 68 |
+
"زیاد": 1.4,
|
| 69 |
+
"تمام": 1.5,
|
| 70 |
+
"حتما": 1.7,
|
| 71 |
+
"شدیدا": 1.6,
|
| 72 |
+
"به شدت": 1.7,
|
| 73 |
+
"بسیار زیاد": 1.8,
|
| 74 |
+
"خیلی زیاد": 1.7,
|
| 75 |
+
"بسیار": 1.6,
|
| 76 |
+
"به کرات": 1.5,
|
| 77 |
+
"بیش از حد": 1.6,
|
| 78 |
+
"به طور کامل": 1.5,
|
| 79 |
+
"کاملا": 1.8,
|
| 80 |
+
"تماما": 1.5,
|
| 81 |
+
"به طور کامل": 1.5
|
| 82 |
+
},
|
| 83 |
+
"negation": [
|
| 84 |
+
"نیست", "نیست", "نه", "هیچ", "هیچ وقت", "هرگز",
|
| 85 |
+
"نمی", "نمیکنم", "نکردم", "نخواهم کرد",
|
| 86 |
+
"نمیخواهم", "نپسندیدم", "خوشم نمیآید",
|
| 87 |
+
"نیستم", "نیستی", "نیست", "نیستیم", "نیستید", "نیستند",
|
| 88 |
+
"نیست", "نیست", "نیست", "نیست", "نیست", "نیست",
|
| 89 |
+
"نه", "نیست", "نیست", "نیست", "هرگز", "هیچ",
|
| 90 |
+
"نه...نه", "نه هم", "هیچ", "هیچ وقت", "هرگز"
|
| 91 |
+
],
|
| 92 |
+
"diminishers": {
|
| 93 |
+
"کمی": 0.7,
|
| 94 |
+
"کم": 0.6,
|
| 95 |
+
"کمی": 0.7,
|
| 96 |
+
"کوچک": 0.7,
|
| 97 |
+
"سبک": 0.8,
|
| 98 |
+
"نسبتا": 0.85,
|
| 99 |
+
"نسبتا": 0.9,
|
| 100 |
+
"تقریبا": 0.8,
|
| 101 |
+
"تقریبا": 0.8,
|
| 102 |
+
"تا حدی": 0.7,
|
| 103 |
+
"جزئی": 0.7
|
| 104 |
+
},
|
| 105 |
+
"contrast_words": [
|
| 106 |
+
"اما", "ولی", "لیکن", "با این حال", "با این وجود",
|
| 107 |
+
"با این حال", "با این وجود", "با این حال",
|
| 108 |
+
"در حالی که", "در حالی که", "در حالی که", "بر خلاف", "بر خلاف"
|
| 109 |
+
]
|
| 110 |
+
}
|
lexicons/turkish_lexicon.json
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"positive": [
|
| 3 |
+
"iyi", "güzel", "harika", "mükemmel", "muhteşem", "süper", "müthiş", "olağanüstü",
|
| 4 |
+
"seviyorum", "beğendim", "beğeniyorum", "hoşlanıyorum", "sevdim", "aşığım",
|
| 5 |
+
"hoş", "güzel", "şahane", "nefis", "leziz", "tatlı", "sevimli", "hoş",
|
| 6 |
+
"başarılı", "başarılı", "başarı", "zafer", "galibiyet", "kazanç",
|
| 7 |
+
"memnun", "mutlu", "sevinçli", "neşeli", "keyifli", "zevkli", "hoşnut",
|
| 8 |
+
"tatmin", "memnuniyet", "beğeni", "hoşnutluk", "razı", "kabul",
|
| 9 |
+
"övgü", "takdir", "alkış", "bravo", "aferin", "tebrik", "kutlama",
|
| 10 |
+
"mükemmel", "kusursuz", "mükemmeliyet", "mükemmellik", "mükemmel",
|
| 11 |
+
"harika", "muhteşem", "olağanüstü", "fevkalade", "sıra dışı",
|
| 12 |
+
"güzel", "hoş", "şirin", "sevimli", "çekici", "cazibeli", "alımlı",
|
| 13 |
+
"mutlu", "sevinçli", "neşeli", "keyifli", "şen", "sevinç dolu",
|
| 14 |
+
"başarılı", "başarılı", "başarılı", "başarılı", "başarılı",
|
| 15 |
+
"faydalı", "yararlı", "kullanışlı", "pratik", "etkili", "verimli",
|
| 16 |
+
"kaliteli", "nitelikli", "üstün", "yüksek kalite", "premium",
|
| 17 |
+
"hızlı", "çabuk", "süratli", "tez", "ivedi", "acele",
|
| 18 |
+
"ucuz", "ekonomik", "uygun fiyatlı", "makul", "cazip",
|
| 19 |
+
"temiz", "düzenli", "tertipli", "derli toplu", "düzenli",
|
| 20 |
+
"güvenli", "emniyetli", "güvenilir", "sağlam", "istikrarlı",
|
| 21 |
+
"rahat", "konforlu", "huzurlu", "sakin", "dingin", "sükunetli",
|
| 22 |
+
"eğlenceli", "keyifli", "zevkli", "hoş", "neşeli", "şen",
|
| 23 |
+
"yeni", "modern", "çağdaş", "güncel", "aktüel", "fresh",
|
| 24 |
+
"kolay", "basit", "sade", "anlaşılır", "açık", "net",
|
| 25 |
+
"öneriyorum", "tavsiye ediyorum", "öneririm", "tavsiye ederim"
|
| 26 |
+
],
|
| 27 |
+
"negative": [
|
| 28 |
+
"kötü", "berbat", "çirkin", "iğrenç", "nefret", "beğenmedim", "hoşlanmadım",
|
| 29 |
+
"kötü", "fena", "berbat", "rezil", "korkunç", "dehşet", "felaket",
|
| 30 |
+
"beğenmedim", "hoşlanmadım", "sevmedim", "nefret ediyorum", "tiksinme",
|
| 31 |
+
"üzgün", "kızgın", "sinirli", "öfkeli", "hiddetli", "kızgın",
|
| 32 |
+
"hayal kırıklığı", "hayal kırıklığı", "umutsuzluk", "çaresizlik",
|
| 33 |
+
"can sıkıcı", "sıkıcı", "bıktırıcı", "usandırıcı", "bezdirici",
|
| 34 |
+
"tatsız", "hoş olmayan", "nahoş", "itici", "tiksindirici",
|
| 35 |
+
"mutsuz", "üzgün", "kederli", "hüzünlü", "acılı", "elemli",
|
| 36 |
+
"üzüntü", "keder", "acı", "elem", "hüzün", "gam", "kaygı",
|
| 37 |
+
"kızgınlık", "öfke", "sinir", "hiddet", "gazap", "kızgınlık",
|
| 38 |
+
"rahatsız", "hoşnutsuz", "memnuniyetsiz", "beğenmeme", "razı olmama",
|
| 39 |
+
"yavaş", "ağır", "gecikmiş", "gecikmeli", "tembel", "atıl",
|
| 40 |
+
"pahalı", "masraflı", "maliyetli", "pahalı", "yüksek fiyatlı",
|
| 41 |
+
"kirli", "pis", "murdar", "kirli", "pasaklı", "dağınık",
|
| 42 |
+
"tehlikeli", "riskli", "zararlı", "tehlikeli", "güvensiz",
|
| 43 |
+
"bozuk", "arızalı", "hatalı", "kusurlu", "eksik", "noksan",
|
| 44 |
+
"güvensiz", "güvenilmez", "istikrarsız", "kararsız", "belirsiz",
|
| 45 |
+
"işe yaramaz", "faydasız", "yararsız", "boş", "anlamsız",
|
| 46 |
+
"karmaşık", "karışık", "anlaşılmaz", "belirsiz", "muğlak",
|
| 47 |
+
"şikayet", "dert", "sorun", "problem", "sıkıntı", "dert",
|
| 48 |
+
"hata", "yanlış", "kusur", "eksiklik", "noksanlık", "arız",
|
| 49 |
+
"pişmanlık", "nedamet", "üzüntü", "keder", "acı", "elem",
|
| 50 |
+
"endişe", "kaygı", "stres", "baskı", "gerilim", "tedirginlik",
|
| 51 |
+
"korku", "dehşet", "panik", "alarm", "endişe", "kaygı",
|
| 52 |
+
"ağrı", "acı", "sancı", "sızı", "elem", "ızdırap",
|
| 53 |
+
"zayıf", "güçsüz", "cılız", "narin", "hassas", "kırılgan",
|
| 54 |
+
"aptal", "ahmak", "budala", "saçma", "anlamsız", "mantıksız",
|
| 55 |
+
"tembel", "atalet", "işsiz", "boş", "hareketsiz", "durgun",
|
| 56 |
+
"kaba", "nezaketsiz", "kırıcı", "incitici", "haksız", "adil olmayan",
|
| 57 |
+
"bencil", "açgözlü", "kendini düşünen", "egoist", "narsist",
|
| 58 |
+
"dürüst olmayan", "aldatıcı", "yanıltıcı", "sahte", "yalancı",
|
| 59 |
+
"haksız", "adil olmayan", "önyargılı", "ayrımcı", "eşitsiz",
|
| 60 |
+
"reddet", "ret", "inkar", "yadsıma", "kabul etmeme"
|
| 61 |
+
],
|
| 62 |
+
"intensifiers": {
|
| 63 |
+
"çok": 1.5,
|
| 64 |
+
"çok fazla": 1.6,
|
| 65 |
+
"aşırı": 2.0,
|
| 66 |
+
"son derece": 1.8,
|
| 67 |
+
"gerçekten": 1.3,
|
| 68 |
+
"oldukça": 1.2,
|
| 69 |
+
"fazlasıyla": 1.4,
|
| 70 |
+
"tamamen": 1.5,
|
| 71 |
+
"kesinlikle": 1.7,
|
| 72 |
+
"müthiş": 1.6,
|
| 73 |
+
"fevkalade": 1.8,
|
| 74 |
+
"olağanüstü": 1.9,
|
| 75 |
+
"son derece": 1.8,
|
| 76 |
+
"hayli": 1.4,
|
| 77 |
+
"epey": 1.3,
|
| 78 |
+
"bir hayli": 1.4,
|
| 79 |
+
"oldukça": 1.2,
|
| 80 |
+
"epeyce": 1.3,
|
| 81 |
+
"hayli": 1.4,
|
| 82 |
+
"daha": 1.3,
|
| 83 |
+
"en": 1.5,
|
| 84 |
+
"pek": 1.4,
|
| 85 |
+
"gayet": 1.3,
|
| 86 |
+
"iyice": 1.4,
|
| 87 |
+
"iyiden iyiye": 1.5
|
| 88 |
+
},
|
| 89 |
+
"negation": [
|
| 90 |
+
"değil", "yok", "hayır", "hiç", "hiçbir", "hiçbir şey",
|
| 91 |
+
"hiçbir zaman", "asla", "bir daha", "olmaz", "olmayacak",
|
| 92 |
+
"yapmam", "yapmıyorum", "yapmadım", "yapmayacağım",
|
| 93 |
+
"istemiyorum", "beğenmedim", "hoşlanmıyorum",
|
| 94 |
+
"değilim", "değilsin", "değil", "değiliz", "değilsiniz", "değiller",
|
| 95 |
+
"yok", "yoktur", "yoktur", "yok", "yok", "yok",
|
| 96 |
+
"hayır", "olmaz", "yok", "değil", "asla", "hiç",
|
| 97 |
+
"ne...ne", "ne de", "hiç de", "hiç değil", "asla değil"
|
| 98 |
+
],
|
| 99 |
+
"diminishers": {
|
| 100 |
+
"biraz": 0.7,
|
| 101 |
+
"az": 0.6,
|
| 102 |
+
"birazcık": 0.7,
|
| 103 |
+
"küçük": 0.7,
|
| 104 |
+
"hafif": 0.8,
|
| 105 |
+
"nispeten": 0.85,
|
| 106 |
+
"oldukça": 0.9,
|
| 107 |
+
"epey": 0.9,
|
| 108 |
+
"hayli": 0.9,
|
| 109 |
+
"neredeyse": 0.8,
|
| 110 |
+
"hemen hemen": 0.8,
|
| 111 |
+
"kısmen": 0.7,
|
| 112 |
+
"kısmi": 0.7
|
| 113 |
+
},
|
| 114 |
+
"contrast_words": [
|
| 115 |
+
"ama", "fakat", "lakin", "ancak", "yalnız", "sadece",
|
| 116 |
+
"buna rağmen", "yine de", "gene de", "bununla birlikte",
|
| 117 |
+
"oysa", "oysaki", "halbuki", "buna karşın", "bunun aksine"
|
| 118 |
+
]
|
| 119 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
|
sentiment_analyzer.py
ADDED
|
@@ -0,0 +1,555 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multilingual Sentiment Analysis Tool
|
| 3 |
+
Supports Turkish, Persian, and English using lexicon-based and machine learning approaches
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import re
|
| 7 |
+
import json
|
| 8 |
+
import os
|
| 9 |
+
from typing import Dict, List, Tuple, Optional
|
| 10 |
+
from collections import Counter
|
| 11 |
+
import math
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class SentimentLexicon:
|
| 15 |
+
"""Base class for sentiment lexicons"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, language: str):
|
| 18 |
+
self.language = language
|
| 19 |
+
self.positive_words = set()
|
| 20 |
+
self.negative_words = set()
|
| 21 |
+
self.intensifiers = {}
|
| 22 |
+
self.negation_words = set()
|
| 23 |
+
self.diminishers = {}
|
| 24 |
+
self.contrast_words = set()
|
| 25 |
+
self.idioms_positive = []
|
| 26 |
+
self.idioms_negative = []
|
| 27 |
+
self._load_lexicon()
|
| 28 |
+
|
| 29 |
+
def _load_lexicon(self):
|
| 30 |
+
"""Load language-specific sentiment lexicon"""
|
| 31 |
+
lexicon_file = f"lexicons/{self.language}_lexicon.json"
|
| 32 |
+
if os.path.exists(lexicon_file):
|
| 33 |
+
with open(lexicon_file, 'r', encoding='utf-8') as f:
|
| 34 |
+
data = json.load(f)
|
| 35 |
+
self.positive_words = set(data.get('positive', []))
|
| 36 |
+
self.negative_words = set(data.get('negative', []))
|
| 37 |
+
self.intensifiers = data.get('intensifiers', {})
|
| 38 |
+
self.negation_words = set(data.get('negation', []))
|
| 39 |
+
self.diminishers = data.get('diminishers', {})
|
| 40 |
+
self.contrast_words = set(data.get('contrast_words', []))
|
| 41 |
+
self.idioms_positive = data.get('idioms_positive', [])
|
| 42 |
+
self.idioms_negative = data.get('idioms_negative', [])
|
| 43 |
+
else:
|
| 44 |
+
# Default English lexicon
|
| 45 |
+
self._load_default_english()
|
| 46 |
+
|
| 47 |
+
def _load_default_english(self):
|
| 48 |
+
"""Load default English sentiment words"""
|
| 49 |
+
self.positive_words = {
|
| 50 |
+
'good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic',
|
| 51 |
+
'love', 'like', 'best', 'perfect', 'beautiful', 'nice', 'happy',
|
| 52 |
+
'pleased', 'satisfied', 'awesome', 'brilliant', 'outstanding'
|
| 53 |
+
}
|
| 54 |
+
self.negative_words = {
|
| 55 |
+
'bad', 'terrible', 'awful', 'horrible', 'worst', 'hate', 'dislike',
|
| 56 |
+
'poor', 'disappointed', 'sad', 'angry', 'frustrated', 'annoying',
|
| 57 |
+
'boring', 'ugly', 'awful', 'disgusting', 'pathetic'
|
| 58 |
+
}
|
| 59 |
+
self.intensifiers = {
|
| 60 |
+
'very': 1.5, 'extremely': 2.0, 'really': 1.3, 'quite': 1.2,
|
| 61 |
+
'too': 1.4, 'so': 1.3, 'absolutely': 1.8, 'completely': 1.5
|
| 62 |
+
}
|
| 63 |
+
self.negation_words = {
|
| 64 |
+
'not', 'no', 'never', 'none', 'nobody', 'nothing', 'nowhere',
|
| 65 |
+
'neither', 'cannot', "can't", "won't", "don't", "doesn't"
|
| 66 |
+
}
|
| 67 |
+
self.diminishers = {}
|
| 68 |
+
self.contrast_words = set()
|
| 69 |
+
self.idioms_positive = []
|
| 70 |
+
self.idioms_negative = []
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class TextPreprocessor:
|
| 74 |
+
"""Text preprocessing for different languages"""
|
| 75 |
+
|
| 76 |
+
def __init__(self, language: str):
|
| 77 |
+
self.language = language
|
| 78 |
+
|
| 79 |
+
def preprocess(self, text: str) -> List[str]:
|
| 80 |
+
"""Preprocess text and return tokens"""
|
| 81 |
+
# Convert to lowercase
|
| 82 |
+
text = text.lower()
|
| 83 |
+
|
| 84 |
+
# Remove URLs
|
| 85 |
+
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
|
| 86 |
+
|
| 87 |
+
# Remove email addresses
|
| 88 |
+
text = re.sub(r'\S+@\S+', '', text)
|
| 89 |
+
|
| 90 |
+
# Remove special characters but keep punctuation for sentiment analysis
|
| 91 |
+
text = re.sub(r'[^\w\s\.,!?;:()\-\']', '', text)
|
| 92 |
+
|
| 93 |
+
# Tokenize
|
| 94 |
+
tokens = re.findall(r'\b\w+\b|[.,!?;:()]', text)
|
| 95 |
+
|
| 96 |
+
return tokens
|
| 97 |
+
|
| 98 |
+
def normalize_turkish(self, text: str) -> str:
|
| 99 |
+
"""Normalize Turkish text (handle special characters)"""
|
| 100 |
+
# Turkish character normalization
|
| 101 |
+
replacements = {
|
| 102 |
+
'ı': 'i', 'İ': 'I',
|
| 103 |
+
'ğ': 'g', 'Ğ': 'G',
|
| 104 |
+
'ü': 'u', 'Ü': 'U',
|
| 105 |
+
'ş': 's', 'Ş': 'S',
|
| 106 |
+
'ö': 'o', 'Ö': 'O',
|
| 107 |
+
'ç': 'c', 'Ç': 'C'
|
| 108 |
+
}
|
| 109 |
+
for old, new in replacements.items():
|
| 110 |
+
text = text.replace(old, new)
|
| 111 |
+
return text
|
| 112 |
+
|
| 113 |
+
def normalize_persian(self, text: str) -> str:
|
| 114 |
+
"""Normalize Persian text (handle different character forms)"""
|
| 115 |
+
# Persian/Arabic character normalization
|
| 116 |
+
# This is a simplified version - real implementation would be more complex
|
| 117 |
+
return text
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
class LexiconBasedAnalyzer:
|
| 121 |
+
"""Lexicon-based sentiment analysis with enhanced features"""
|
| 122 |
+
|
| 123 |
+
def __init__(self, language: str):
|
| 124 |
+
self.language = language
|
| 125 |
+
self.lexicon = SentimentLexicon(language)
|
| 126 |
+
self.preprocessor = TextPreprocessor(language)
|
| 127 |
+
|
| 128 |
+
def _check_idioms(self, text: str) -> Tuple[float, float]:
|
| 129 |
+
"""Check for sentiment idioms in text"""
|
| 130 |
+
pos_score = 0.0
|
| 131 |
+
neg_score = 0.0
|
| 132 |
+
text_lower = text.lower()
|
| 133 |
+
|
| 134 |
+
for idiom in self.lexicon.idioms_positive:
|
| 135 |
+
if idiom.lower() in text_lower:
|
| 136 |
+
pos_score += 2.0 # Idioms carry stronger sentiment
|
| 137 |
+
|
| 138 |
+
for idiom in self.lexicon.idioms_negative:
|
| 139 |
+
if idiom.lower() in text_lower:
|
| 140 |
+
neg_score += 2.0
|
| 141 |
+
|
| 142 |
+
return pos_score, neg_score
|
| 143 |
+
|
| 144 |
+
def analyze(self, text: str) -> Dict:
|
| 145 |
+
"""Analyze sentiment using lexicon-based approach"""
|
| 146 |
+
tokens = self.preprocessor.preprocess(text)
|
| 147 |
+
text_lower = text.lower()
|
| 148 |
+
|
| 149 |
+
positive_score = 0
|
| 150 |
+
negative_score = 0
|
| 151 |
+
sentiment_words = []
|
| 152 |
+
negation_count = 0
|
| 153 |
+
|
| 154 |
+
# Check idioms first
|
| 155 |
+
idiom_pos, idiom_neg = self._check_idioms(text)
|
| 156 |
+
positive_score += idiom_pos
|
| 157 |
+
negative_score += idiom_neg
|
| 158 |
+
|
| 159 |
+
# Check for negation and intensifiers with improved scope detection
|
| 160 |
+
window_size = 4 # Increased window for better context
|
| 161 |
+
i = 0
|
| 162 |
+
while i < len(tokens):
|
| 163 |
+
token = tokens[i]
|
| 164 |
+
is_negated = False
|
| 165 |
+
intensifier_strength = 1.0
|
| 166 |
+
diminisher_strength = 1.0
|
| 167 |
+
|
| 168 |
+
# Check for negation in window (improved scope)
|
| 169 |
+
for j in range(max(0, i - window_size), min(len(tokens), i + window_size + 1)):
|
| 170 |
+
if tokens[j] in self.lexicon.negation_words:
|
| 171 |
+
# Check if negation is still in scope (not interrupted by punctuation)
|
| 172 |
+
if j < i:
|
| 173 |
+
# Check for punctuation between negation and token
|
| 174 |
+
has_punctuation = any(
|
| 175 |
+
tokens[k] in ['.', '!', '?', ';', ',']
|
| 176 |
+
for k in range(j + 1, i)
|
| 177 |
+
)
|
| 178 |
+
if not has_punctuation:
|
| 179 |
+
is_negated = True
|
| 180 |
+
negation_count += 1
|
| 181 |
+
break
|
| 182 |
+
|
| 183 |
+
# Check for intensifiers
|
| 184 |
+
if i > 0 and tokens[i-1] in self.lexicon.intensifiers:
|
| 185 |
+
intensifier_strength = self.lexicon.intensifiers[tokens[i-1]]
|
| 186 |
+
|
| 187 |
+
# Check for diminishers
|
| 188 |
+
if i > 0 and tokens[i-1] in self.lexicon.diminishers:
|
| 189 |
+
diminisher_strength = self.lexicon.diminishers[tokens[i-1]]
|
| 190 |
+
|
| 191 |
+
# Check sentiment
|
| 192 |
+
if token in self.lexicon.positive_words:
|
| 193 |
+
score = 1.0 * intensifier_strength * diminisher_strength
|
| 194 |
+
if is_negated:
|
| 195 |
+
negative_score += score
|
| 196 |
+
sentiment_words.append(('negative', token, is_negated))
|
| 197 |
+
else:
|
| 198 |
+
positive_score += score
|
| 199 |
+
sentiment_words.append(('positive', token, is_negated))
|
| 200 |
+
elif token in self.lexicon.negative_words:
|
| 201 |
+
score = 1.0 * intensifier_strength * diminisher_strength
|
| 202 |
+
if is_negated:
|
| 203 |
+
positive_score += score
|
| 204 |
+
sentiment_words.append(('positive', token, is_negated))
|
| 205 |
+
else:
|
| 206 |
+
negative_score += score
|
| 207 |
+
sentiment_words.append(('negative', token, is_negated))
|
| 208 |
+
|
| 209 |
+
i += 1
|
| 210 |
+
|
| 211 |
+
# Calculate final sentiment
|
| 212 |
+
total_score = positive_score + negative_score
|
| 213 |
+
if total_score == 0:
|
| 214 |
+
polarity = 'neutral'
|
| 215 |
+
confidence = 0.0
|
| 216 |
+
elif positive_score > negative_score:
|
| 217 |
+
polarity = 'positive'
|
| 218 |
+
confidence = positive_score / total_score if total_score > 0 else 0.0
|
| 219 |
+
else:
|
| 220 |
+
polarity = 'negative'
|
| 221 |
+
confidence = negative_score / total_score if total_score > 0 else 0.0
|
| 222 |
+
|
| 223 |
+
return {
|
| 224 |
+
'polarity': polarity,
|
| 225 |
+
'confidence': round(confidence, 3),
|
| 226 |
+
'positive_score': round(positive_score, 3),
|
| 227 |
+
'negative_score': round(negative_score, 3),
|
| 228 |
+
'sentiment_words': sentiment_words[:10], # Limit to first 10
|
| 229 |
+
'method': 'lexicon-based'
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
class RuleBasedAnalyzer:
|
| 234 |
+
"""Rule-based sentiment analysis with advanced linguistic rules"""
|
| 235 |
+
|
| 236 |
+
def __init__(self, language: str):
|
| 237 |
+
self.language = language
|
| 238 |
+
self.lexicon = SentimentLexicon(language)
|
| 239 |
+
self.preprocessor = TextPreprocessor(language)
|
| 240 |
+
|
| 241 |
+
def _detect_emoticons(self, text: str) -> Tuple[float, float]:
|
| 242 |
+
"""Detect and score emoticons and emojis"""
|
| 243 |
+
pos_score = 0.0
|
| 244 |
+
neg_score = 0.0
|
| 245 |
+
|
| 246 |
+
# Positive emoticons
|
| 247 |
+
positive_emoticons = [
|
| 248 |
+
':)', ':-)', '=)', ';)', ';-)', '=D', ':D', ':-D',
|
| 249 |
+
'😊', '😀', '😁', '😂', '🤣', '😃', '😄', '😆', '😍', '🥰',
|
| 250 |
+
'😎', '🤗', '👍', '👏', '🎉', '❤️', '💕', '💖', '💗', '💓'
|
| 251 |
+
]
|
| 252 |
+
|
| 253 |
+
# Negative emoticons
|
| 254 |
+
negative_emoticons = [
|
| 255 |
+
':(', ':-(', '=(', ':/', ':-/', ':|', ':-|', '>:(', '>:(',
|
| 256 |
+
'😢', '😞', '😠', '😡', '😤', '😭', '😰', '😨', '😱', '😖',
|
| 257 |
+
'😣', '😫', '😩', '👎', '💔', '😒', '😔', '😕', '🙁'
|
| 258 |
+
]
|
| 259 |
+
|
| 260 |
+
for emoji in positive_emoticons:
|
| 261 |
+
count = text.count(emoji)
|
| 262 |
+
pos_score += count * 1.5
|
| 263 |
+
|
| 264 |
+
for emoji in negative_emoticons:
|
| 265 |
+
count = text.count(emoji)
|
| 266 |
+
neg_score += count * 1.5
|
| 267 |
+
|
| 268 |
+
return pos_score, neg_score
|
| 269 |
+
|
| 270 |
+
def _handle_contrast_words(self, text: str, tokens: List[str],
|
| 271 |
+
pos_score: float, neg_score: float) -> Tuple[float, float]:
|
| 272 |
+
"""Handle contrast words that may shift sentiment"""
|
| 273 |
+
# Find contrast words and adjust sentiment
|
| 274 |
+
contrast_positions = []
|
| 275 |
+
for i, token in enumerate(tokens):
|
| 276 |
+
if token.lower() in self.lexicon.contrast_words:
|
| 277 |
+
contrast_positions.append(i)
|
| 278 |
+
|
| 279 |
+
# If contrast word found, reduce weight of sentiment before it
|
| 280 |
+
if contrast_positions:
|
| 281 |
+
# Simple heuristic: reduce earlier sentiment by 30%
|
| 282 |
+
reduction_factor = 0.7
|
| 283 |
+
return pos_score * reduction_factor, neg_score * reduction_factor
|
| 284 |
+
|
| 285 |
+
return pos_score, neg_score
|
| 286 |
+
|
| 287 |
+
def _detect_comparatives_superlatives(self, tokens: List[str]) -> float:
|
| 288 |
+
"""Detect comparative and superlative forms that intensify sentiment"""
|
| 289 |
+
multiplier = 1.0
|
| 290 |
+
|
| 291 |
+
# Check for superlatives
|
| 292 |
+
superlative_indicators = ['most', 'best', 'worst', 'least', 'greatest']
|
| 293 |
+
for token in tokens:
|
| 294 |
+
if token.lower() in superlative_indicators:
|
| 295 |
+
multiplier = max(multiplier, 1.4)
|
| 296 |
+
|
| 297 |
+
# Check for comparatives
|
| 298 |
+
comparative_patterns = ['more', 'less', 'better', 'worse', 'greater', 'smaller']
|
| 299 |
+
for token in tokens:
|
| 300 |
+
if token.lower() in comparative_patterns:
|
| 301 |
+
multiplier = max(multiplier, 1.2)
|
| 302 |
+
|
| 303 |
+
return multiplier
|
| 304 |
+
|
| 305 |
+
def _detect_repetition(self, text: str) -> float:
|
| 306 |
+
"""Detect repeated characters/words that indicate emphasis"""
|
| 307 |
+
multiplier = 1.0
|
| 308 |
+
|
| 309 |
+
# Repeated characters (e.g., "soooo good")
|
| 310 |
+
repeated_chars = re.findall(r'(\w)\1{2,}', text.lower())
|
| 311 |
+
if repeated_chars:
|
| 312 |
+
multiplier += len(repeated_chars) * 0.1
|
| 313 |
+
|
| 314 |
+
# Repeated words (e.g., "good good good")
|
| 315 |
+
words = text.lower().split()
|
| 316 |
+
if len(words) > 2:
|
| 317 |
+
for i in range(len(words) - 2):
|
| 318 |
+
if words[i] == words[i+1] == words[i+2]:
|
| 319 |
+
multiplier += 0.2
|
| 320 |
+
break
|
| 321 |
+
|
| 322 |
+
return min(multiplier, 1.5) # Cap at 1.5x
|
| 323 |
+
|
| 324 |
+
def _detect_sentiment_shifters(self, text: str) -> float:
|
| 325 |
+
"""Detect words that shift sentiment polarity"""
|
| 326 |
+
shifters = {
|
| 327 |
+
'but': 0.6, 'however': 0.6, 'although': 0.7, 'though': 0.7,
|
| 328 |
+
'yet': 0.6, 'still': 0.7, 'nevertheless': 0.6, 'nonetheless': 0.6
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
text_lower = text.lower()
|
| 332 |
+
for shifter, factor in shifters.items():
|
| 333 |
+
if shifter in text_lower:
|
| 334 |
+
return factor
|
| 335 |
+
|
| 336 |
+
return 1.0
|
| 337 |
+
|
| 338 |
+
def analyze(self, text: str) -> Dict:
|
| 339 |
+
"""Analyze sentiment using rule-based approach with advanced rules"""
|
| 340 |
+
# Use lexicon-based as base
|
| 341 |
+
base_analyzer = LexiconBasedAnalyzer(self.language)
|
| 342 |
+
result = base_analyzer.analyze(text)
|
| 343 |
+
|
| 344 |
+
# Apply advanced rules
|
| 345 |
+
tokens = self.preprocessor.preprocess(text)
|
| 346 |
+
text_lower = text.lower()
|
| 347 |
+
|
| 348 |
+
# Rule 1: Exclamation marks increase sentiment strength
|
| 349 |
+
exclamation_count = text.count('!')
|
| 350 |
+
if exclamation_count > 0:
|
| 351 |
+
multiplier = 1 + min(exclamation_count * 0.15, 0.5) # Cap at 50% increase
|
| 352 |
+
result['positive_score'] *= multiplier
|
| 353 |
+
result['negative_score'] *= multiplier
|
| 354 |
+
|
| 355 |
+
# Rule 2: Question marks may indicate uncertainty or sarcasm
|
| 356 |
+
question_count = text.count('?')
|
| 357 |
+
if question_count > 1:
|
| 358 |
+
uncertainty_factor = max(0.7, 1 - (question_count * 0.1))
|
| 359 |
+
result['confidence'] *= uncertainty_factor
|
| 360 |
+
|
| 361 |
+
# Rule 3: All caps increase sentiment strength (but check length)
|
| 362 |
+
caps_words = [w for w in text.split() if w.isupper() and len(w) > 2]
|
| 363 |
+
if len(caps_words) > 0:
|
| 364 |
+
caps_multiplier = 1 + (len(caps_words) * 0.1)
|
| 365 |
+
result['positive_score'] *= caps_multiplier
|
| 366 |
+
result['negative_score'] *= caps_multiplier
|
| 367 |
+
|
| 368 |
+
# Rule 4: Enhanced emoticon detection
|
| 369 |
+
emoji_pos, emoji_neg = self._detect_emoticons(text)
|
| 370 |
+
result['positive_score'] += emoji_pos
|
| 371 |
+
result['negative_score'] += emoji_neg
|
| 372 |
+
|
| 373 |
+
# Rule 5: Contrast words handling
|
| 374 |
+
result['positive_score'], result['negative_score'] = self._handle_contrast_words(
|
| 375 |
+
text, tokens, result['positive_score'], result['negative_score']
|
| 376 |
+
)
|
| 377 |
+
|
| 378 |
+
# Rule 6: Comparatives and superlatives
|
| 379 |
+
comp_super_mult = self._detect_comparatives_superlatives(tokens)
|
| 380 |
+
result['positive_score'] *= comp_super_mult
|
| 381 |
+
result['negative_score'] *= comp_super_mult
|
| 382 |
+
|
| 383 |
+
# Rule 7: Repetition detection
|
| 384 |
+
rep_mult = self._detect_repetition(text)
|
| 385 |
+
result['positive_score'] *= rep_mult
|
| 386 |
+
result['negative_score'] *= rep_mult
|
| 387 |
+
|
| 388 |
+
# Rule 8: Sentiment shifters
|
| 389 |
+
shifter_factor = self._detect_sentiment_shifters(text)
|
| 390 |
+
if shifter_factor < 1.0:
|
| 391 |
+
# Reduce earlier sentiment
|
| 392 |
+
result['positive_score'] *= shifter_factor
|
| 393 |
+
result['negative_score'] *= shifter_factor
|
| 394 |
+
|
| 395 |
+
# Rule 9: Ellipsis may indicate uncertainty or trailing off
|
| 396 |
+
if '...' in text or '…' in text:
|
| 397 |
+
result['confidence'] *= 0.9
|
| 398 |
+
|
| 399 |
+
# Rule 10: Multiple punctuation (e.g., "!!!") increases emphasis
|
| 400 |
+
multi_punct = re.findall(r'[!?]{2,}', text)
|
| 401 |
+
if multi_punct:
|
| 402 |
+
punct_mult = 1 + (len(multi_punct) * 0.1)
|
| 403 |
+
result['positive_score'] *= punct_mult
|
| 404 |
+
result['negative_score'] *= punct_mult
|
| 405 |
+
|
| 406 |
+
# Rule 11: Hashtags in social media context
|
| 407 |
+
hashtags = re.findall(r'#\w+', text)
|
| 408 |
+
if hashtags:
|
| 409 |
+
# Check if hashtags contain sentiment words
|
| 410 |
+
for tag in hashtags:
|
| 411 |
+
tag_lower = tag.lower()
|
| 412 |
+
if any(word in tag_lower for word in self.lexicon.positive_words):
|
| 413 |
+
result['positive_score'] += 0.5
|
| 414 |
+
if any(word in tag_lower for word in self.lexicon.negative_words):
|
| 415 |
+
result['negative_score'] += 0.5
|
| 416 |
+
|
| 417 |
+
# Rule 12: URL presence may indicate spam or less personal content
|
| 418 |
+
if re.search(r'http[s]?://', text):
|
| 419 |
+
result['confidence'] *= 0.95
|
| 420 |
+
|
| 421 |
+
# Rule 13: Length-based confidence adjustment
|
| 422 |
+
word_count = len(text.split())
|
| 423 |
+
if word_count < 3:
|
| 424 |
+
result['confidence'] *= 0.8 # Very short texts are less reliable
|
| 425 |
+
elif word_count > 100:
|
| 426 |
+
result['confidence'] *= 0.95 # Very long texts may have mixed sentiment
|
| 427 |
+
|
| 428 |
+
# Recalculate polarity
|
| 429 |
+
total = result['positive_score'] + result['negative_score']
|
| 430 |
+
if total > 0:
|
| 431 |
+
if result['positive_score'] > result['negative_score']:
|
| 432 |
+
result['polarity'] = 'positive'
|
| 433 |
+
result['confidence'] = result['positive_score'] / total
|
| 434 |
+
else:
|
| 435 |
+
result['polarity'] = 'negative'
|
| 436 |
+
result['confidence'] = result['negative_score'] / total
|
| 437 |
+
else:
|
| 438 |
+
result['polarity'] = 'neutral'
|
| 439 |
+
result['confidence'] = 0.0
|
| 440 |
+
|
| 441 |
+
result['method'] = 'rule-based'
|
| 442 |
+
return result
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
class HybridAnalyzer:
|
| 446 |
+
"""Hybrid approach combining lexicon, rules, and simple ML features"""
|
| 447 |
+
|
| 448 |
+
def __init__(self, language: str):
|
| 449 |
+
self.language = language
|
| 450 |
+
self.lexicon_analyzer = LexiconBasedAnalyzer(language)
|
| 451 |
+
self.rule_analyzer = RuleBasedAnalyzer(language)
|
| 452 |
+
|
| 453 |
+
def analyze(self, text: str) -> Dict:
|
| 454 |
+
"""Analyze sentiment using hybrid approach"""
|
| 455 |
+
# Get results from both methods
|
| 456 |
+
lexicon_result = self.lexicon_analyzer.analyze(text)
|
| 457 |
+
rule_result = self.rule_analyzer.analyze(text)
|
| 458 |
+
|
| 459 |
+
# Combine scores with weights
|
| 460 |
+
lexicon_weight = 0.4
|
| 461 |
+
rule_weight = 0.6
|
| 462 |
+
|
| 463 |
+
combined_positive = (lexicon_result['positive_score'] * lexicon_weight +
|
| 464 |
+
rule_result['positive_score'] * rule_weight)
|
| 465 |
+
combined_negative = (lexicon_result['negative_score'] * lexicon_weight +
|
| 466 |
+
rule_result['negative_score'] * rule_weight)
|
| 467 |
+
|
| 468 |
+
total = combined_positive + combined_negative
|
| 469 |
+
if total == 0:
|
| 470 |
+
polarity = 'neutral'
|
| 471 |
+
confidence = 0.0
|
| 472 |
+
elif combined_positive > combined_negative:
|
| 473 |
+
polarity = 'positive'
|
| 474 |
+
confidence = combined_positive / total
|
| 475 |
+
else:
|
| 476 |
+
polarity = 'negative'
|
| 477 |
+
confidence = combined_negative / total
|
| 478 |
+
|
| 479 |
+
return {
|
| 480 |
+
'polarity': polarity,
|
| 481 |
+
'confidence': round(confidence, 3),
|
| 482 |
+
'positive_score': round(combined_positive, 3),
|
| 483 |
+
'negative_score': round(combined_negative, 3),
|
| 484 |
+
'lexicon_result': lexicon_result,
|
| 485 |
+
'rule_result': rule_result,
|
| 486 |
+
'method': 'hybrid'
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
class MultilingualSentimentAnalyzer:
|
| 491 |
+
"""Main sentiment analyzer supporting multiple languages and methods"""
|
| 492 |
+
|
| 493 |
+
def __init__(self, language: str = 'english', method: str = 'hybrid'):
|
| 494 |
+
"""
|
| 495 |
+
Initialize sentiment analyzer
|
| 496 |
+
|
| 497 |
+
Args:
|
| 498 |
+
language: 'english', 'turkish', or 'persian'
|
| 499 |
+
method: 'lexicon', 'rule', or 'hybrid'
|
| 500 |
+
"""
|
| 501 |
+
self.language = language.lower()
|
| 502 |
+
self.method = method.lower()
|
| 503 |
+
|
| 504 |
+
if method == 'lexicon':
|
| 505 |
+
self.analyzer = LexiconBasedAnalyzer(self.language)
|
| 506 |
+
elif method == 'rule':
|
| 507 |
+
self.analyzer = RuleBasedAnalyzer(self.language)
|
| 508 |
+
else: # hybrid
|
| 509 |
+
self.analyzer = HybridAnalyzer(self.language)
|
| 510 |
+
|
| 511 |
+
def analyze(self, text: str) -> Dict:
|
| 512 |
+
"""Analyze sentiment of input text"""
|
| 513 |
+
if not text or not text.strip():
|
| 514 |
+
return {
|
| 515 |
+
'polarity': 'neutral',
|
| 516 |
+
'confidence': 0.0,
|
| 517 |
+
'error': 'Empty text provided'
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
try:
|
| 521 |
+
result = self.analyzer.analyze(text)
|
| 522 |
+
result['language'] = self.language
|
| 523 |
+
result['text_length'] = len(text)
|
| 524 |
+
result['word_count'] = len(text.split())
|
| 525 |
+
return result
|
| 526 |
+
except Exception as e:
|
| 527 |
+
return {
|
| 528 |
+
'polarity': 'neutral',
|
| 529 |
+
'confidence': 0.0,
|
| 530 |
+
'error': str(e)
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
def analyze_batch(self, texts: List[str]) -> List[Dict]:
|
| 534 |
+
"""Analyze multiple texts"""
|
| 535 |
+
return [self.analyze(text) for text in texts]
|
| 536 |
+
|
| 537 |
+
def get_statistics(self, texts: List[str]) -> Dict:
|
| 538 |
+
"""Get aggregate statistics for a batch of texts"""
|
| 539 |
+
results = self.analyze_batch(texts)
|
| 540 |
+
|
| 541 |
+
polarity_counts = Counter([r['polarity'] for r in results])
|
| 542 |
+
total = len(results)
|
| 543 |
+
|
| 544 |
+
avg_confidence = sum([r.get('confidence', 0) for r in results]) / total if total > 0 else 0
|
| 545 |
+
|
| 546 |
+
return {
|
| 547 |
+
'total_texts': total,
|
| 548 |
+
'polarity_distribution': dict(polarity_counts),
|
| 549 |
+
'polarity_percentages': {
|
| 550 |
+
k: round(v / total * 100, 2)
|
| 551 |
+
for k, v in polarity_counts.items()
|
| 552 |
+
},
|
| 553 |
+
'average_confidence': round(avg_confidence, 3)
|
| 554 |
+
}
|
| 555 |
+
|