Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import ( | |
| AutoTokenizer, AutoModelForTokenClassification, | |
| AutoModelForSequenceClassification, | |
| MarianMTModel, MarianTokenizer | |
| ) | |
| import torch | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # === Tokenizery i modele ABSA === | |
| aspect_tokenizer = AutoTokenizer.from_pretrained("EfektMotyla/bert-aspect-ner") | |
| aspect_model = AutoModelForTokenClassification.from_pretrained("EfektMotyla/bert-aspect-ner").to(device) | |
| sentiment_tokenizer = AutoTokenizer.from_pretrained("EfektMotyla/absa-roberta") | |
| sentiment_model = AutoModelForSequenceClassification.from_pretrained("EfektMotyla/absa-roberta").to(device) | |
| en_to_pl_tokenizer = MarianTokenizer.from_pretrained("gsarti/opus-mt-tc-en-pl") | |
| en_to_pl_model = MarianMTModel.from_pretrained("gsarti/opus-mt-tc-en-pl").to(device) | |
| pl_to_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-pl-en") | |
| pl_to_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-pl-en").to(device) | |
| def translate(texts, tokenizer, model): | |
| inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to(device) | |
| translated = model.generate(**inputs) | |
| return tokenizer.batch_decode(translated, skip_special_tokens=True) | |
| def translate_pl_to_en(texts): return translate(texts, pl_to_en_tokenizer, pl_to_en_model) | |
| def translate_en_to_pl(texts): return translate(texts, en_to_pl_tokenizer, en_to_pl_model) | |
| def extract_aspects(text): | |
| inputs = aspect_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device) | |
| with torch.no_grad(): | |
| outputs = aspect_model(**inputs) | |
| preds = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy() | |
| tokens = aspect_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) | |
| labels = [aspect_model.config.id2label[p] for p in preds] | |
| aspects = [] | |
| current_tokens = [] | |
| for token, label in zip(tokens, labels): | |
| if label == "B-ASP": | |
| if current_tokens: | |
| aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip()) | |
| current_tokens = [] | |
| current_tokens = [token] | |
| elif label == "I-ASP" and current_tokens: | |
| current_tokens.append(token) | |
| else: | |
| if current_tokens: | |
| aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip()) | |
| current_tokens = [] | |
| if current_tokens: | |
| aspects.append(aspect_tokenizer.convert_tokens_to_string(current_tokens).strip()) | |
| return list(set(aspects)) | |
| def analyze(text_pl): | |
| try: | |
| text_en = translate_pl_to_en([text_pl])[0] | |
| aspects_en = extract_aspects(text_en) | |
| if not aspects_en: | |
| return "Nie wykryto żadnych aspektów." | |
| results = [] | |
| for asp in aspects_en: | |
| input_text = f"{text_en} [SEP] {asp}" | |
| inputs = sentiment_tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(device) | |
| with torch.no_grad(): | |
| logits = sentiment_model(**inputs).logits | |
| predicted_class_id = int(logits.argmax().cpu()) | |
| sentiment_label = {0: "negatywny", 1: "neutralny", 2: "pozytywny", 3: "konfliktowy"}[predicted_class_id] | |
| asp_pl = translate_en_to_pl([asp])[0] | |
| results.append(f"{asp_pl.capitalize()} -> **{sentiment_label}**") | |
| return "\n".join(results) | |
| except Exception as e: | |
| return f"Błąd: {str(e)}" | |
| # === Gradio UI === | |
| demo = gr.Interface( | |
| fn=analyze, | |
| inputs=gr.Textbox(label="Komentarz po polsku", placeholder="Np. Pizza była pyszna, ale kelner był nieuprzejmy."), | |
| outputs=gr.Markdown(label="Wyniki analizy"), | |
| title="ABSA – Analiza komentarzy restauracyjnych", | |
| description="Wykrywa aspekty i przypisuje im sentymenty (pozytywny / negatywny / neutralny / konfliktowy)." | |
| ) | |
| demo.launch() | |