import gradio as gr import json import re import html import os import pandas as pd import re # models from transformers import AutoTokenizer, AutoModelForTokenClassification,AutoModelForSequenceClassification, pipeline import torch aspdevice = "cpu" # Your aspect extraction logic model_id_ate = "gauneg/roberta-base-absa-ate-sentiment" tokenizer_ate = AutoTokenizer.from_pretrained(model_id_ate) model_ate = AutoModelForTokenClassification.from_pretrained(model_id_ate) senti_pipeline = pipeline(task='ner', model=model_ate, tokenizer=tokenizer_ate, device=aspdevice, aggregation_strategy='simple') # Your emotion detection logic emodevice = "mps" if torch.backends.mps.is_available() else 0 if torch.cuda.is_available() else -1 emotion_model = "j-hartmann/emotion-english-distilroberta-base" emo_tokenizer = AutoTokenizer.from_pretrained(emotion_model) emo_model = AutoModelForSequenceClassification.from_pretrained(emotion_model) classifier = pipeline("text-classification", model=emo_model,tokenizer=emo_tokenizer, top_k=None, device=emodevice) def extract_full_word(text, start, end): word_start = start while word_start > 0 and re.match(r'\w', text[word_start - 1]): word_start -= 1 word_end = end while word_end < len(text) and re.match(r'\w', text[word_end]): word_end += 1 return text[word_start:word_end].strip() def extract_full_analysis(review): sentences = [rev.strip() for rev in review.split(".") if rev.strip()] #aspect predictions asppredictions = senti_pipeline(review) #emotion predictions emopredictions = classifier(sentences) #Extract aspects aspect_word = [ {"word": d["word"].strip(), "start": d["start"], "end": d["end"]} for d in asppredictions ] # Extract sentiment term sentiments = [d["entity_group"] for d in asppredictions] scores = [f"{d['score']:.4f}" for d in asppredictions] refined_aspects = [] for aspect in aspect_word: full_word = extract_full_word(review, aspect["start"], aspect["end"]) if len(full_word) >= 3: refined_aspects.append(full_word) refined_aspects = list(dict.fromkeys(refined_aspects)) flat_preds = [d for sentence_preds in emopredictions for d in sentence_preds if d["score"] >= 0.5] emotions = [d["label"] for d in flat_preds] emotion_score = [d["score"] for d in flat_preds] dynamic_result ={ "review": review, "aspect_words": refined_aspects, "sentiment": sentiments, "score": scores, "emotions": emotions, "emo-score": emotion_score } return dynamic_result def highlight_aspects(text, aspects): aspects_sorted = sorted(aspects, key=len, reverse=True) aspects_regex = [re.escape(asp) for asp in aspects_sorted] pattern = r'\b(?:' + '|'.join(aspects_regex) + r')\b' def replace_match(match): return f'{match.group()}' return re.sub(pattern, replace_match, text, flags=re.IGNORECASE) def format_review(review): text = review[0]['review'] aspects = review[0]['aspect_words'] highlighted_text = highlight_aspects(text, aspects) aspects = [asp.strip() for asp in review[0]['aspect_words']] emotions = [emo.strip() for emo in review[0]['emotions']] sentiments = [senti for senti in review[0]['sentiment']] sentimentScores = [scor for scor in review[0]['score']] emotionScores = [emscor for emscor in review[0]['emo-score']] aspects_html = '
{highlighted_text}