Spaces:

end-rin
/

unicode-attack-demo

Running

App Files Files Community

end-rin commited on 16 days ago

Commit

f94bb1f

verified ·

1 Parent(s): d82ee96

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +51 -15

app.py CHANGED Viewed

@@ -234,19 +234,43 @@ def load_model(model_key: str):
 def get_prediction(model, text: str, task: str, model_key: str) -> str:
-    """Get model prediction for the given text and task."""
     if task == 'fact_verification':
-        system_prompt = "You are a fact-checking assistant. Classify claims as SUPPORTS, REFUTES, or NOT_ENOUGH_INFO. Reply with only one word."
-        user_prompt = f"Classify this claim: {text}"
-        valid_labels = ['SUPPORTS', 'REFUTES', 'NOT_ENOUGH_INFO']
     else:
-        system_prompt = "You are a text classifier. Determine if text is an argument or not. Reply with only ARGUMENT or NOT_ARGUMENT."
-        user_prompt = f"Is this an argument? {text}"
-        valid_labels = ['ARGUMENT', 'NOT_ARGUMENT']
     messages = [
-        {"role": "system", "content": system_prompt},
         {"role": "user", "content": user_prompt},
     ]
@@ -258,13 +282,25 @@ def get_prediction(model, text: str, task: str, model_key: str) -> str:
     output = response['choices'][0]['message']['content'].strip().upper()
-    # Parse to valid label
-    for label in valid_labels:
-        if label in output:
-            return label
-    # Default fallback
-    return valid_labels[-1]
 def run_attack(text: str, style: str, model_key: str, task: str, progress=gr.Progress()):

 def get_prediction(model, text: str, task: str, model_key: str) -> str:
+    """Get model prediction for the given text and task.
+    Uses the same prompt structure as the actual experiments
+    (phase1_evaluation.ipynb) for consistency.
+    """
     if task == 'fact_verification':
+        user_prompt = f"""Classify the following claim as either 'SUPPORTS', 'REFUTES', or 'NOT_ENOUGH_INFO'.
+The available classes are:
+- "SUPPORTS": The claim is true or supported by common knowledge.
+- "REFUTES": The claim is false or contradicts established facts.
+- "NOT_ENOUGH_INFO": The claim cannot be verified with common knowledge.
+### Important:
+- **Only choose one class from the above-mentioned classes.**
+- **Answer with just one word, no other explanations.**
+Claim: {text}
+Answer:"""
     else:
+        user_prompt = f"""Determine if the following sentence is an argument. An argument is a statement that takes a position on a topic and provides reasoning or evidence.
+The available classes are:
+- "ARGUMENT": The sentence is an argument (takes a stance with reasoning).
+- "NOT_ARGUMENT": The sentence is not an argument (factual statement, question, or lacks clear stance).
+### Important:
+- **Only choose one class from the above-mentioned classes.**
+- **Answer with just one word, no other explanations.**
+Sentence: {text}
+Answer:"""
     messages = [
         {"role": "user", "content": user_prompt},
     ]
     output = response['choices'][0]['message']['content'].strip().upper()
+    # Robust label extraction (matches experiment extract_classification logic)
+    if task == 'fact_verification':
+        if 'NOT_ENOUGH_INFO' in output or 'NOT ENOUGH INFO' in output or 'NEI' in output:
+            return 'NOT_ENOUGH_INFO'
+        if 'REFUTE' in output:
+            return 'REFUTES'
+        if 'SUPPORT' in output:
+            return 'SUPPORTS'
+        return 'NOT_ENOUGH_INFO'
+    else:
+        if 'NOT_ARGUMENT' in output or 'NOT ARGUMENT' in output or 'NOT AN ARGUMENT' in output:
+            return 'NOT_ARGUMENT'
+        if output.startswith('NO'):
+            return 'NOT_ARGUMENT'
+        if 'ARGUMENT' in output:
+            return 'ARGUMENT'
+        if output.startswith('YES'):
+            return 'ARGUMENT'
+        return 'NOT_ARGUMENT'
 def run_attack(text: str, style: str, model_key: str, task: str, progress=gr.Progress()):