Spaces:

asmashayea
/

absa-app

Sleeping

App Files Files Community

asmashayea commited on Sep 3, 2025

Commit

c7d107e

1 Parent(s): 4177079

gpt

Browse files

Files changed (2) hide show

app.py +4 -5
inference.py +51 -49

app.py CHANGED Viewed

@@ -5,18 +5,17 @@ def run_absa(review, model_choice):
     try:
         return predict_absa(review, model_choice)
     except Exception as e:
-        return {"error": str(e)}   # JSON output for errors too
-# app
 demo = gr.Interface(
     fn=run_absa,
     inputs=[
         gr.Textbox(label="Arabic Review"),
         gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose Model", value="Araberta")
     ],
-    outputs=gr.JSON(label="Extracted Aspect-Sentiment-Opinion Triplets"),  # ✅ JSON viewer
     title="Arabic ABSA (Aspect-Based Sentiment Analysis)",
-    description="Choose a model (Araberta, mT5, GPT) to extract aspects, opinions, and sentiment using LoRA adapters"
 )
 if __name__ == "__main__":

     try:
         return predict_absa(review, model_choice)
     except Exception as e:
+        return {"error": str(e)}
 demo = gr.Interface(
     fn=run_absa,
     inputs=[
         gr.Textbox(label="Arabic Review"),
         gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose Model", value="Araberta")
     ],
+    outputs=gr.JSON(label="Extracted Aspect-Sentiment-Opinion Triplets"),
     title="Arabic ABSA (Aspect-Based Sentiment Analysis)",
+    description="Choose a model (Araberta, mT5, mBART, GPT3.5, GPT4o) to extract aspects and sentiment"
 )
 if __name__ == "__main__":

inference.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 import json
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel, AutoConfig
@@ -5,8 +6,15 @@ from peft import LoraConfig, get_peft_model, PeftModel
 from araberta_setting.modeling_bilstm_crf import BERT_BiLSTM_CRF
 from seq2seq_inference import infer_t5_prompt
 from huggingface_hub import hf_hub_download
-# Define supported models and their adapter IDs
 MODEL_OPTIONS = {
     "Araberta": {
         "base": "asmashayea/absa-araberta",
@@ -21,18 +29,20 @@ MODEL_OPTIONS = {
         "adapter": "asmashayea/mbart-absa"
     },
     "GPT3.5": {
-        "base": "bigscience/bloom-560m",  # placeholder
-        "adapter": "asmashayea/gpt-absa"
     },
     "GPT4o": {
-        "base": "bigscience/bloom-560m",  # placeholder
-        "adapter": "asmashayea/gpt-absa"
     }
 }
 cached_models = {}
 def load_araberta():
     path = "asmashayea/absa-arabert"
     device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -40,17 +50,14 @@ def load_araberta():
     tokenizer = AutoTokenizer.from_pretrained(path)
     base_model = AutoModel.from_pretrained(path)
-    # Load LoRA adapter
     lora_config = LoraConfig.from_pretrained(path)
     lora_model = get_peft_model(base_model, lora_config)
-    # Download CRF head from Hub
     local_pt = hf_hub_download(repo_id=path, filename="bilstm_crf_head.pt")
     config = AutoConfig.from_pretrained(path)
     model = BERT_BiLSTM_CRF(lora_model, config)
-    # Always map to current device
     state_dict = torch.load(local_pt, map_location=torch.device(device))
     model.load_state_dict(state_dict)
     model.to(device).eval()
@@ -66,14 +73,7 @@ def infer_araberta(text):
         tokenizer, model = cached_models["Araberta"]
     device = next(model.parameters()).device
-    inputs = tokenizer(
-        text,
-        return_tensors='pt',
-        truncation=True,
-        padding='max_length',
-        max_length=128
-    )
     input_ids = inputs['input_ids'].to(device)
     attention_mask = inputs['attention_mask'].to(device)
@@ -82,55 +82,38 @@ def infer_araberta(text):
         predicted_ids = outputs['logits'][0].cpu().tolist()
     tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu())
-    predicted_labels = [model.config.id2label.get(p, 'O') for p in predicted_ids]
     clean_tokens = [t for t in tokens if t not in tokenizer.all_special_tokens]
     clean_labels = [l for t, l in zip(tokens, predicted_labels) if t not in tokenizer.all_special_tokens]
-    # Horizontal token:label pairs
-    pairs = [f"{token}: {label}" for token, label in zip(clean_tokens, clean_labels)]
-    horizontal_output = " | ".join(pairs)
-    # Group into aspect spans
-    aspects = []
-    current_tokens, current_sentiment = [], None
     for token, label in zip(clean_tokens, clean_labels):
         if label.startswith("B-"):
             if current_tokens:
-                aspects.append({
-                    "aspect": " ".join(current_tokens).replace("##", ""),
-                    "sentiment": current_sentiment
-                })
             current_tokens = [token]
             current_sentiment = label.split("-")[1]
         elif label.startswith("I-") and current_sentiment == label.split("-")[1]:
             current_tokens.append(token)
         else:
             if current_tokens:
-                aspects.append({
-                    "aspect": " ".join(current_tokens).replace("##", ""),
-                    "sentiment": current_sentiment
-                })
                 current_tokens, current_sentiment = [], None
     if current_tokens:
-        aspects.append({
-            "aspect": " ".join(current_tokens).replace("##", ""),
-            "sentiment": current_sentiment
-        })
-    return {
-        "token_predictions": horizontal_output,
-        "aspects": aspects
-    }
 def load_model(model_key):
     if model_key in cached_models:
         return cached_models[model_key]
     base_id = MODEL_OPTIONS[model_key]["base"]
     adapter_id = MODEL_OPTIONS[model_key]["adapter"]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     tokenizer = AutoTokenizer.from_pretrained(adapter_id)
@@ -141,14 +124,33 @@ def load_model(model_key):
     cached_models[model_key] = (tokenizer, model)
     return tokenizer, model
 def predict_absa(text, model_choice):
     if model_choice in ['mT5', 'mBART']:
         tokenizer, model = load_model(model_choice)
-        decoded = infer_t5_prompt(text, tokenizer, model)
     elif model_choice == 'Araberta':
-        decoded = infer_araberta(text)
     else:
-        decoded = {"error": f"Model {model_choice} not supported"}
-    return decoded

+import os
 import torch
 import json
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel, AutoConfig
 from araberta_setting.modeling_bilstm_crf import BERT_BiLSTM_CRF
 from seq2seq_inference import infer_t5_prompt
 from huggingface_hub import hf_hub_download
+from openai import OpenAI
+# 🔑 OpenAI client (make sure OPENAI_API_KEY is set in Hugging Face Space secrets)
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# Your fine-tuned OpenAI model IDs
+GPT35_FINETUNED = "ft:gpt-3.5-turbo-0125:asma:gpt-3-5-turbo-absa:Bb6gmwkE"
+GPT4O_FINETUNED = "ft:gpt-4o-mini-2024-07-18:asma:gpt4-finetune-absa:BazoEjnp"
 MODEL_OPTIONS = {
     "Araberta": {
         "base": "asmashayea/absa-araberta",
         "adapter": "asmashayea/mbart-absa"
     },
     "GPT3.5": {
+        "base": "openai",
+        "adapter": GPT35_FINETUNED
     },
     "GPT4o": {
+        "base": "openai",
+        "adapter": GPT4O_FINETUNED
     }
 }
 cached_models = {}
+# ---------------------------
+# Araberta loader
+# ---------------------------
 def load_araberta():
     path = "asmashayea/absa-arabert"
     device = "cuda" if torch.cuda.is_available() else "cpu"
     tokenizer = AutoTokenizer.from_pretrained(path)
     base_model = AutoModel.from_pretrained(path)
     lora_config = LoraConfig.from_pretrained(path)
     lora_model = get_peft_model(base_model, lora_config)
     local_pt = hf_hub_download(repo_id=path, filename="bilstm_crf_head.pt")
     config = AutoConfig.from_pretrained(path)
     model = BERT_BiLSTM_CRF(lora_model, config)
     state_dict = torch.load(local_pt, map_location=torch.device(device))
     model.load_state_dict(state_dict)
     model.to(device).eval()
         tokenizer, model = cached_models["Araberta"]
     device = next(model.parameters()).device
+    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
     input_ids = inputs['input_ids'].to(device)
     attention_mask = inputs['attention_mask'].to(device)
         predicted_ids = outputs['logits'][0].cpu().tolist()
     tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu())
+    predicted_labels = [model.id2label.get(p, 'O') for p in predicted_ids]
     clean_tokens = [t for t in tokens if t not in tokenizer.all_special_tokens]
     clean_labels = [l for t, l in zip(tokens, predicted_labels) if t not in tokenizer.all_special_tokens]
+    aspects, current_tokens, current_sentiment = [], [], None
     for token, label in zip(clean_tokens, clean_labels):
         if label.startswith("B-"):
             if current_tokens:
+                aspects.append({"aspect": " ".join(current_tokens).replace("##", ""), "sentiment": current_sentiment})
             current_tokens = [token]
             current_sentiment = label.split("-")[1]
         elif label.startswith("I-") and current_sentiment == label.split("-")[1]:
             current_tokens.append(token)
         else:
             if current_tokens:
+                aspects.append({"aspect": " ".join(current_tokens).replace("##", ""), "sentiment": current_sentiment})
                 current_tokens, current_sentiment = [], None
     if current_tokens:
+        aspects.append({"aspect": " ".join(current_tokens).replace("##", ""), "sentiment": current_sentiment})
+    return {"aspects": aspects}
+# ---------------------------
+# Hugging Face seq2seq loaders
+# ---------------------------
 def load_model(model_key):
     if model_key in cached_models:
         return cached_models[model_key]
     base_id = MODEL_OPTIONS[model_key]["base"]
     adapter_id = MODEL_OPTIONS[model_key]["adapter"]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     tokenizer = AutoTokenizer.from_pretrained(adapter_id)
     cached_models[model_key] = (tokenizer, model)
     return tokenizer, model
+# ---------------------------
+# OpenAI inference
+# ---------------------------
+def infer_openai(text, model_name):
+    prompt = f"Extract aspects and their sentiment from this review:\n\n{text}\n\nReturn JSON with 'aspect' and 'sentiment'."
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=[{"role": "user", "content": prompt}],
+        max_tokens=512,
+        temperature=0
+    )
+    output = response.choices[0].message.content.strip()
+    try:
+        return json.loads(output)
+    except:
+        return {"raw_output": output}
+# ---------------------------
+# Unified predictor
+# ---------------------------
 def predict_absa(text, model_choice):
     if model_choice in ['mT5', 'mBART']:
         tokenizer, model = load_model(model_choice)
+        return infer_t5_prompt(text, tokenizer, model)
     elif model_choice == 'Araberta':
+        return infer_araberta(text)
+    elif model_choice in ['GPT3.5', 'GPT4o']:
+        return infer_openai(text, MODEL_OPTIONS[model_choice]["adapter"])
     else:
+        return {"error": f"Model {model_choice} not supported"}