Spaces:

abdull90
/

aspect_emo_llm_deploy

Sleeping

File size: 5,796 Bytes

import gradio as gr
import json
import re
import html
import os
import pandas as pd
import re

# models
from transformers import AutoTokenizer, AutoModelForTokenClassification,AutoModelForSequenceClassification, pipeline
import torch

aspdevice = "cpu"
# Your aspect extraction logic
model_id_ate = "gauneg/roberta-base-absa-ate-sentiment"
tokenizer_ate = AutoTokenizer.from_pretrained(model_id_ate)
model_ate = AutoModelForTokenClassification.from_pretrained(model_id_ate)
senti_pipeline = pipeline(task='ner', model=model_ate, tokenizer=tokenizer_ate, device=aspdevice, aggregation_strategy='simple')

# Your emotion detection logic
emodevice = "mps" if torch.backends.mps.is_available() else 0 if torch.cuda.is_available() else -1
emotion_model = "j-hartmann/emotion-english-distilroberta-base"
emo_tokenizer = AutoTokenizer.from_pretrained(emotion_model)
emo_model = AutoModelForSequenceClassification.from_pretrained(emotion_model)
classifier = pipeline("text-classification", model=emo_model,tokenizer=emo_tokenizer, top_k=None, device=emodevice)

def extract_full_word(text, start, end):
    word_start = start
    while word_start > 0 and re.match(r'\w', text[word_start - 1]):
        word_start -= 1
    word_end = end
    while word_end < len(text) and re.match(r'\w', text[word_end]):
        word_end += 1
    return text[word_start:word_end].strip()

def extract_full_analysis(review):
    sentences = [rev.strip() for rev in review.split(".") if rev.strip()]
    #aspect predictions
    asppredictions = senti_pipeline(review)
    #emotion predictions
    emopredictions = classifier(sentences)
    #Extract aspects
    aspect_word = [
        {"word": d["word"].strip(), "start": d["start"], "end": d["end"]}
        for d in asppredictions
    ]
    # Extract sentiment  term
    sentiments = [d["entity_group"] for d in asppredictions]
    scores = [f"{d['score']:.4f}" for d in asppredictions]
    refined_aspects = []
    for aspect in aspect_word:
        full_word = extract_full_word(review, aspect["start"], aspect["end"])
        if len(full_word) >= 3:  
            refined_aspects.append(full_word)
    refined_aspects = list(dict.fromkeys(refined_aspects))  

    flat_preds = [d for sentence_preds in emopredictions for d in sentence_preds if d["score"] >= 0.5]
    emotions = [d["label"]  for d in flat_preds]
    emotion_score = [d["score"]  for d in flat_preds]
    dynamic_result ={
        "review":        review,
        "aspect_words": refined_aspects,
        "sentiment":   sentiments,
        "score":       scores,
        "emotions":     emotions,
        "emo-score": emotion_score
    }
    return dynamic_result


def highlight_aspects(text, aspects):
    aspects_sorted = sorted(aspects, key=len, reverse=True)
    aspects_regex = [re.escape(asp) for asp in aspects_sorted]
    pattern = r'\b(?:' + '|'.join(aspects_regex) + r')\b'
    def replace_match(match):
        return f'<span style="font-weight: bold; background-color: yellow;">{match.group()}</span>'
    return re.sub(pattern, replace_match, text, flags=re.IGNORECASE)

def format_review(review):
    text = review[0]['review']
    aspects = review[0]['aspect_words']

    highlighted_text = highlight_aspects(text, aspects)
    aspects = [asp.strip() for asp in review[0]['aspect_words']]
    emotions = [emo.strip() for emo in review[0]['emotions']]
    sentiments = [senti for senti in review[0]['sentiment']]
    sentimentScores = [scor for scor in review[0]['score']]
    emotionScores = [emscor for emscor in review[0]['emo-score']]

    aspects_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(asp)}</div>' for asp in aspects) + '</div>'
    emotions_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(emo)}</div>' for emo in emotions) + '</div>'
    sentiments_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{html.escape(senti)}</div>' for senti in sentiments) + '</div>'
    sentiScor_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{sentiscor}</div>' for sentiscor in sentimentScores) + '</div>'
    emoScor_html = '<div class="cell-grid">' + "".join(f'<div class="cell-item">{emoscor}</div>' for emoscor in emotionScores) + '</div>'
    return f"""
    <div style="border: 1px solid #ccc; padding: 10px; margin-bottom: 10px;">
        <h4>Review</h4>
        <p>{highlighted_text}</p>
        <h3>Aspect Words</h3>
        {aspects_html}
        <h3>Sentiments</h3>
        {sentiments_html}
        <h3>Emotions</h3>
        {emotions_html}
    </div>
    """


def submit_new_review(text):
    if not text.strip():
        return "Please enter a review.", []
    new_dynamic_reviews =[]
    _reviews = extract_full_analysis(text)
    new_dynamic_reviews.append(_reviews)
    return format_review(new_dynamic_reviews), new_dynamic_reviews

with gr.Blocks(css="""
.cell-grid {
    display: grid;
    grid-template-columns: repeat(6, 1fr);
    gap: 10px;
}
.cell-item {
    background-color: #f0f0f0;
    padding: 10px;
    border: 1px solid #ccc;
    text-align: center;
}
""") as demo:
    # Header
    gr.Markdown("# Yelp Review Demonstration for Aspect and Emotion Extracted")

    # Submit Form
    gr.Markdown("## Submit Your Review")
    with gr.Row():
        with gr.Column(scale=8):
            submit_text = gr.Textbox(label="Write your review", lines=5)
            submit_button = gr.Button("Submit")

    # Dynamic Reviews
    gr.Markdown("## User Submitted Reviews")
    dynamic_display = gr.HTML()

    # States
    dynamic_reviews_state = gr.State()
    submit_button.click(submit_new_review, inputs=submit_text, outputs=[dynamic_display, dynamic_reviews_state])

# Launch
if __name__ == "__main__":
    demo.launch()
# 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