absa-app / inference.py
asmashayea's picture
trans
1c5e325
import os
import torch
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel, AutoConfig
from peft import LoraConfig, get_peft_model, PeftModel
from araberta_setting.modeling_bilstm_crf import BERT_BiLSTM_CRF
from seq2seq_inference import infer_t5_prompt
from huggingface_hub import hf_hub_download
from openai import OpenAI
# 🔑 OpenAI client (make sure OPENAI_API_KEY is set in Hugging Face Space secrets)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Your fine-tuned OpenAI model IDs
GPT35_FINETUNED = "ft:gpt-3.5-turbo-0125:asma:gpt-3-5-turbo-absa:Bb6gmwkE"
GPT4O_FINETUNED = "ft:gpt-4o-mini-2024-07-18:asma:gpt4-finetune-absa:BazoEjnp"
MODEL_OPTIONS = {
"Araberta": {
"base": "asmashayea/absa-araberta",
"adapter": "asmashayea/absa-araberta"
},
"mT5": {
"base": "google/mt5-base",
"adapter": "asmashayea/mt4-absa"
},
"GPT3.5": {
"base": "openai",
"adapter": GPT35_FINETUNED
},
# "GPT4o": {
# "base": "openai",
# "adapter": GPT4O_FINETUNED
# }
}
cached_models = {}
# ---------------------------
# Araberta loader
# ---------------------------
def load_araberta():
path = "asmashayea/absa-arabert"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(path)
base_model = AutoModel.from_pretrained(path)
lora_config = LoraConfig.from_pretrained(path)
lora_model = get_peft_model(base_model, lora_config)
local_pt = hf_hub_download(repo_id=path, filename="bilstm_crf_head.pt")
config = AutoConfig.from_pretrained(path)
model = BERT_BiLSTM_CRF(lora_model, config)
state_dict = torch.load(local_pt, map_location=torch.device(device))
model.load_state_dict(state_dict)
model.to(device).eval()
cached_models["Araberta"] = (tokenizer, model)
return tokenizer, model
def infer_araberta(text):
if "Araberta" not in cached_models:
tokenizer, model = load_araberta()
else:
tokenizer, model = cached_models["Araberta"]
device = next(model.parameters()).device
inputs = tokenizer(
text,
return_tensors='pt',
truncation=True,
padding='max_length',
max_length=128
)
input_ids = inputs['input_ids'].to(device)
attention_mask = inputs['attention_mask'].to(device)
with torch.no_grad():
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
predicted_ids = outputs['logits'][0].cpu().tolist()
tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu())
predicted_labels = [model.id2label.get(p, 'O') for p in predicted_ids]
# Remove special tokens
clean_tokens = [t for t in tokens if t not in tokenizer.all_special_tokens]
clean_labels = [l for t, l in zip(tokens, predicted_labels) if t not in tokenizer.all_special_tokens]
# Build structured token predictions (pretty JSON)
token_predictions = [
{"token": t.replace("##", ""), "label": l}
for t, l in zip(clean_tokens, clean_labels)
]
# Group tokens by aspect spans
aspects, current_tokens, current_sentiment = [], [], None
def join_tokens(tok_list):
"""
Merge WordPiece tokens correctly:
- Join subwords without spaces
- Add spaces between separate words
"""
merged = ""
for t in tok_list:
if t.startswith("##"):
merged += t[2:]
else:
if merged:
merged += " "
merged += t
return merged
for token, label in zip(clean_tokens, clean_labels):
if label.startswith("B-"):
if current_tokens:
aspects.append({
"aspect": join_tokens(current_tokens),
"sentiment": current_sentiment
})
current_tokens = [token]
current_sentiment = label.split("-")[1]
elif label.startswith("I-") and current_sentiment == label.split("-")[1]:
current_tokens.append(token)
else:
if current_tokens:
aspects.append({
"aspect": join_tokens(current_tokens),
"sentiment": current_sentiment
})
current_tokens, current_sentiment = [], None
if current_tokens:
aspects.append({
"aspect": join_tokens(current_tokens),
"sentiment": current_sentiment
})
return {
"aspects": aspects,
# "token_predictions": token_predictions,
}
# ---------------------------
# Hugging Face seq2seq loaders
# ---------------------------
def load_model(model_key):
if model_key in cached_models:
return cached_models[model_key]
base_id = MODEL_OPTIONS[model_key]["base"]
adapter_id = MODEL_OPTIONS[model_key]["adapter"]
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(adapter_id)
base_model = AutoModelForSeq2SeqLM.from_pretrained(base_id).to(device)
model = PeftModel.from_pretrained(base_model, adapter_id).to(device)
model.eval()
cached_models[model_key] = (tokenizer, model)
return tokenizer, model
# ---------------------------
# OpenAI inference
# ---------------------------
def infer_openai(text, model_name):
prompt = f"Extract aspects and their sentiment from this review:\n\n{text}\n\nReturn JSON with 'aspect' and 'sentiment'."
response = client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
max_tokens=512,
temperature=0
)
output = response.choices[0].message.content.strip()
try:
return json.loads(output)
except:
return {"raw_output": output}
# ---------------------------
# Unified predictor
# ---------------------------
def predict_absa(text, model_choice):
if model_choice in ['mT5', 'mBART']:
tokenizer, model = load_model(model_choice)
return infer_t5_prompt(text, tokenizer, model)
elif model_choice == 'Araberta':
return infer_araberta(text)
elif model_choice in ['GPT3.5', 'GPT4o']:
return infer_openai(text, MODEL_OPTIONS[model_choice]["adapter"])
else:
return {"error": f"Model {model_choice} not supported"}