Spaces:

asmashayea
/

absa-app

Sleeping

App Files Files Community

asmashayea commited on Sep 3, 2025

Commit

7ebac28

1 Parent(s): 6d91ffe

m

Browse files

Files changed (2) hide show

app.py +1 -1
inference.py +30 -33

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ demo = gr.Interface(
     fn=run_absa,
     inputs=[
         gr.Textbox(label="Arabic Review"),
-        gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose Model", value="mT5")
     ],
     outputs=gr.Textbox(label="Extracted Aspect-Sentiment-Opinion Triplets"),
     title="Arabic ABSA (Aspect-Based Sentiment Analysis)",

     fn=run_absa,
     inputs=[
         gr.Textbox(label="Arabic Review"),
+        gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose Model", value="Araberta")
     ],
     outputs=gr.Textbox(label="Extracted Aspect-Sentiment-Opinion Triplets"),
     title="Arabic ABSA (Aspect-Based Sentiment Analysis)",

inference.py CHANGED Viewed

@@ -8,7 +8,6 @@ from huggingface_hub import hf_hub_download
 # Define supported models and their adapter IDs
 MODEL_OPTIONS = {
     "Araberta": {
         "base": "asmashayea/absa-araberta",
         "adapter": "asmashayea/absa-araberta"
@@ -22,31 +21,39 @@ MODEL_OPTIONS = {
         "adapter": "asmashayea/mbart-absa"
     },
     "GPT3.5": {
-        "base": "bigscience/bloom-560m",  # example, not ideal for ABSA
         "adapter": "asmashayea/gpt-absa"
     },
     "GPT4o": {
-        "base": "bigscience/bloom-560m",  # example, not ideal for ABSA
         "adapter": "asmashayea/gpt-absa"
     }
 }
 cached_models = {}
 def load_araberta():
     path = "asmashayea/absa-arabert"
     tokenizer = AutoTokenizer.from_pretrained(path)
     base_model = AutoModel.from_pretrained(path)
     lora_config = LoraConfig.from_pretrained(path)
     lora_model = get_peft_model(base_model, lora_config)
-    local_pt = hf_hub_download(repo_id="asmashayea/absa-arabert", filename="bilstm_crf_head.pt")
     config = AutoConfig.from_pretrained(path)
     model = BERT_BiLSTM_CRF(lora_model, config)
-    model.load_state_dict(torch.load(local_pt))
-    model.eval()
     cached_models["Araberta"] = (tokenizer, model)
     return tokenizer, model
@@ -58,10 +65,15 @@ def infer_araberta(text):
     else:
         tokenizer, model = cached_models["Araberta"]
     device = next(model.parameters()).device
-    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
     input_ids = inputs['input_ids'].to(device)
     attention_mask = inputs['attention_mask'].to(device)
@@ -75,15 +87,13 @@ def infer_araberta(text):
     clean_tokens = [t for t in tokens if t not in tokenizer.all_special_tokens]
     clean_labels = [l for t, l in zip(tokens, predicted_labels) if t not in tokenizer.all_special_tokens]
-    # Horizontal output
     pairs = [f"{token}: {label}" for token, label in zip(clean_tokens, clean_labels)]
     horizontal_output = " | ".join(pairs)
-    # Group by aspect span
     aspects = []
-    current_tokens = []
-    current_sentiment = None
     for token, label in zip(clean_tokens, clean_labels):
         if label.startswith("B-"):
             if current_tokens:
@@ -101,9 +111,7 @@ def infer_araberta(text):
                     "aspect": " ".join(current_tokens).replace("##", ""),
                     "sentiment": current_sentiment
                 })
-                current_tokens = []
-                current_sentiment = None
     if current_tokens:
         aspects.append({
             "aspect": " ".join(current_tokens).replace("##", ""),
@@ -116,7 +124,6 @@ def infer_araberta(text):
     }
 def load_model(model_key):
     if model_key in cached_models:
         return cached_models[model_key]
@@ -124,34 +131,24 @@ def load_model(model_key):
     base_id = MODEL_OPTIONS[model_key]["base"]
     adapter_id = MODEL_OPTIONS[model_key]["adapter"]
     tokenizer = AutoTokenizer.from_pretrained(adapter_id)
-    base_model = AutoModelForSeq2SeqLM.from_pretrained(base_id)
-    model = PeftModel.from_pretrained(base_model, adapter_id)
     model.eval()
     cached_models[model_key] = (tokenizer, model)
     return tokenizer, model
 def predict_absa(text, model_choice):
     if model_choice in ['mT5', 'mBART']:
         tokenizer, model = load_model(model_choice)
         decoded = infer_t5_prompt(text, tokenizer, model)
     elif model_choice == 'Araberta':
         decoded = infer_araberta(text)
-    # prompt = f"استخرج الجوانب والآراء والمشاعر من النص التالي:\n{text}"
-    # inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
-    # with torch.no_grad():
-    #     outputs = model.generate(**inputs, max_new_tokens=128)
-    # decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return decoded

 # Define supported models and their adapter IDs
 MODEL_OPTIONS = {
     "Araberta": {
         "base": "asmashayea/absa-araberta",
         "adapter": "asmashayea/absa-araberta"
         "adapter": "asmashayea/mbart-absa"
     },
     "GPT3.5": {
+        "base": "bigscience/bloom-560m",  # placeholder
         "adapter": "asmashayea/gpt-absa"
     },
     "GPT4o": {
+        "base": "bigscience/bloom-560m",  # placeholder
         "adapter": "asmashayea/gpt-absa"
     }
 }
 cached_models = {}
 def load_araberta():
     path = "asmashayea/absa-arabert"
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     tokenizer = AutoTokenizer.from_pretrained(path)
     base_model = AutoModel.from_pretrained(path)
+    # Load LoRA adapter
     lora_config = LoraConfig.from_pretrained(path)
     lora_model = get_peft_model(base_model, lora_config)
+    # Download CRF head from Hub
+    local_pt = hf_hub_download(repo_id=path, filename="bilstm_crf_head.pt")
     config = AutoConfig.from_pretrained(path)
     model = BERT_BiLSTM_CRF(lora_model, config)
+    # Always map to current device
+    state_dict = torch.load(local_pt, map_location=torch.device(device))
+    model.load_state_dict(state_dict)
+    model.to(device).eval()
     cached_models["Araberta"] = (tokenizer, model)
     return tokenizer, model
     else:
         tokenizer, model = cached_models["Araberta"]
     device = next(model.parameters()).device
+    inputs = tokenizer(
+        text,
+        return_tensors='pt',
+        truncation=True,
+        padding='max_length',
+        max_length=128
+    )
     input_ids = inputs['input_ids'].to(device)
     attention_mask = inputs['attention_mask'].to(device)
     clean_tokens = [t for t in tokens if t not in tokenizer.all_special_tokens]
     clean_labels = [l for t, l in zip(tokens, predicted_labels) if t not in tokenizer.all_special_tokens]
+    # Horizontal token:label pairs
     pairs = [f"{token}: {label}" for token, label in zip(clean_tokens, clean_labels)]
     horizontal_output = " | ".join(pairs)
+    # Group into aspect spans
     aspects = []
+    current_tokens, current_sentiment = [], None
     for token, label in zip(clean_tokens, clean_labels):
         if label.startswith("B-"):
             if current_tokens:
                     "aspect": " ".join(current_tokens).replace("##", ""),
                     "sentiment": current_sentiment
                 })
+                current_tokens, current_sentiment = [], None
     if current_tokens:
         aspects.append({
             "aspect": " ".join(current_tokens).replace("##", ""),
     }
 def load_model(model_key):
     if model_key in cached_models:
         return cached_models[model_key]
     base_id = MODEL_OPTIONS[model_key]["base"]
     adapter_id = MODEL_OPTIONS[model_key]["adapter"]
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     tokenizer = AutoTokenizer.from_pretrained(adapter_id)
+    base_model = AutoModelForSeq2SeqLM.from_pretrained(base_id).to(device)
+    model = PeftModel.from_pretrained(base_model, adapter_id).to(device)
     model.eval()
     cached_models[model_key] = (tokenizer, model)
     return tokenizer, model
 def predict_absa(text, model_choice):
     if model_choice in ['mT5', 'mBART']:
         tokenizer, model = load_model(model_choice)
         decoded = infer_t5_prompt(text, tokenizer, model)
     elif model_choice == 'Araberta':
         decoded = infer_araberta(text)
+    else:
+        decoded = {"error": f"Model {model_choice} not supported"}
     return decoded