Spaces:
Sleeping
Sleeping
File size: 5,796 Bytes
4435743 65c6b90 4435743 65c6b90 4435743 d1a4d05 4435743 65c6b90 4435743 65c6b90 4435743 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import gradio as gr
import json
import re
import html
import os
import pandas as pd
import re
# models
from transformers import AutoTokenizer, AutoModelForTokenClassification,AutoModelForSequenceClassification, pipeline
import torch
aspdevice = "cpu"
# Your aspect extraction logic
model_id_ate = "gauneg/roberta-base-absa-ate-sentiment"
tokenizer_ate = AutoTokenizer.from_pretrained(model_id_ate)
model_ate = AutoModelForTokenClassification.from_pretrained(model_id_ate)
senti_pipeline = pipeline(task='ner', model=model_ate, tokenizer=tokenizer_ate, device=aspdevice, aggregation_strategy='simple')
# Your emotion detection logic
emodevice = "mps" if torch.backends.mps.is_available() else 0 if torch.cuda.is_available() else -1
emotion_model = "j-hartmann/emotion-english-distilroberta-base"
emo_tokenizer = AutoTokenizer.from_pretrained(emotion_model)
emo_model = AutoModelForSequenceClassification.from_pretrained(emotion_model)
classifier = pipeline("text-classification", model=emo_model,tokenizer=emo_tokenizer, top_k=None, device=emodevice)
def extract_full_word(text, start, end):
word_start = start
while word_start > 0 and re.match(r'\w', text[word_start - 1]):
word_start -= 1
word_end = end
while word_end < len(text) and re.match(r'\w', text[word_end]):
word_end += 1
return text[word_start:word_end].strip()
def extract_full_analysis(review):
sentences = [rev.strip() for rev in review.split(".") if rev.strip()]
#aspect predictions
asppredictions = senti_pipeline(review)
#emotion predictions
emopredictions = classifier(sentences)
#Extract aspects
aspect_word = [
{"word": d["word"].strip(), "start": d["start"], "end": d["end"]}
for d in asppredictions
]
# Extract sentiment term
sentiments = [d["entity_group"] for d in asppredictions]
scores = [f"{d['score']:.4f}" for d in asppredictions]
refined_aspects = []
for aspect in aspect_word:
full_word = extract_full_word(review, aspect["start"], aspect["end"])
if len(full_word) >= 3:
refined_aspects.append(full_word)
refined_aspects = list(dict.fromkeys(refined_aspects))
flat_preds = [d for sentence_preds in emopredictions for d in sentence_preds if d["score"] >= 0.5]
emotions = [d["label"] for d in flat_preds]
emotion_score = [d["score"] for d in flat_preds]
dynamic_result ={
"review": review,
"aspect_words": refined_aspects,
"sentiment": sentiments,
"score": scores,
"emotions": emotions,
"emo-score": emotion_score
}
return dynamic_result
def highlight_aspects(text, aspects):
aspects_sorted = sorted(aspects, key=len, reverse=True)
aspects_regex = [re.escape(asp) for asp in aspects_sorted]
pattern = r'\b(?:' + '|'.join(aspects_regex) + r')\b'
def replace_match(match):
return f'<span style="font-weight: bold; background-color: yellow;">{match.group()}</span>'
return re.sub(pattern, replace_match, text, flags=re.IGNORECASE)
def format_review(review):
text = review[0]['review']
aspects = review[0]['aspect_words']
highlighted_text = highlight_aspects(text, aspects)
aspects = [asp.strip() for asp in review[0]['aspect_words']]
emotions = [emo.strip() for emo in review[0]['emotions']]
sentiments = [senti for senti in review[0]['sentiment']]
sentimentScores = [scor for scor in review[0]['score']]
emotionScores = [emscor for emscor in review[0]['emo-score']]
aspects_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(asp)}</div>' for asp in aspects) + '</div>'
emotions_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(emo)}</div>' for emo in emotions) + '</div>'
sentiments_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(senti)}</div>' for senti in sentiments) + '</div>'
sentiScor_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{sentiscor}</div>' for sentiscor in sentimentScores) + '</div>'
emoScor_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{emoscor}</div>' for emoscor in emotionScores) + '</div>'
return f"""
<div style="border: 1px solid #ccc; padding: 10px; margin-bottom: 10px;">
<h4>Review</h4>
<p>{highlighted_text}</p>
<h3>Aspect Words</h3>
{aspects_html}
<h3>Sentiments</h3>
{sentiments_html}
<h3>Emotions</h3>
{emotions_html}
</div>
"""
def submit_new_review(text):
if not text.strip():
return "Please enter a review.", []
new_dynamic_reviews =[]
_reviews = extract_full_analysis(text)
new_dynamic_reviews.append(_reviews)
return format_review(new_dynamic_reviews), new_dynamic_reviews
with gr.Blocks(css="""
.cell-grid {
display: grid;
grid-template-columns: repeat(6, 1fr);
gap: 10px;
}
.cell-item {
background-color: #f0f0f0;
padding: 10px;
border: 1px solid #ccc;
text-align: center;
}
""") as demo:
# Header
gr.Markdown("# Yelp Review Demonstration for Aspect and Emotion Extracted")
# Submit Form
gr.Markdown("## Submit Your Review")
with gr.Row():
with gr.Column(scale=8):
submit_text = gr.Textbox(label="Write your review", lines=5)
submit_button = gr.Button("Submit")
# Dynamic Reviews
gr.Markdown("## User Submitted Reviews")
dynamic_display = gr.HTML()
# States
dynamic_reviews_state = gr.State()
submit_button.click(submit_new_review, inputs=submit_text, outputs=[dynamic_display, dynamic_reviews_state])
# Launch
if __name__ == "__main__":
demo.launch()
# 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 |