Argument-Mining / app.py
oberbics's picture
Update app.py
e97eabb verified
import os
os.environ["OMP_NUM_THREADS"] = "1"
import gradio as gr
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import spaces
# Model configuration
MODEL_ID = "oberbics/llama-3.1-8B-newspaper_argument_mining"
SYSTEM_PROMPT = '''You are an expert at analyzing historical texts and you hate to summarize
OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
<argument>Original argument text OR "NA"</argument>
<claim>Core claim (implication) in one sentence OR "NA"</claim>
<explanation>Why this is an argument OR "NA"</explanation>
<human_verification_needed>True OR False</human_verification_needed>
<argument>It is reported from Malta that the British war-ships "Exmouth," "Euryalus," "Minerva," and "Sutlej" have left for Messina. The French Government has sent two armoured ships and three destroyers to Messina. President Fallieres, Premier Clemenceau, Minister Pichon, and the Presidents of the Senate and Chamber have all sent messages of sympathy to the Italian Government. The help already proffered and accepted is insufficient for the purpose. There is pressing need of extraordinary measures of help, and provisions are in great demand. There is need of doctors, tents, clothing, and provisions for the survivors, who, deprived of all necessities, are exposed to the inclemencies of the winter weather. There is need of fire engines to cope with the flames that are raging among the ruins. The railway station has collapsed. Railway carriages have been destroyed. Almost all the railway employees are dead. The streets are no longer recognisable; they look like enormous fissures in a distant and extensive heap of ruins.</argument>
<claim>Current relief efforts are inadequate and much more extensive aid is urgently needed.</claim>
<explanation>The prefect explicitly argues that existing help is "insufficient" and makes a direct claim that "extraordinary measures" are needed, presenting a clear premise-conclusion structure about the inadequacy of current response.</explanation>
<human_verification_needed>False</human_verification_needed>
EXAMPLE WITHOUT ARGUMENT:
<argument>NA</argument>
<claim>NA</claim>
<explanation>NA</explanation>
<human_verification_needed>FALSE</human_verification_needed>
RULES:
- NEVER print the examples from the prompt or training
- Only output arguments that appear verbatim (or nearly verbatim) in the text
- NO SUMMARY; ONLY EXACT EXTRACTOM FROM THE TEXT; don't extract anything that is not in the text. Only extract word by word
- ONLY output these 4 XML tags
- Extract only original text without changes or use NA when you did not find an argument
- Factual reportings such as "Dem Vulkanausbruch folgten drei Sturzwellen in etwa 10 Meter Höhe" or "Almost all the inhabitants were killed; only a few thousands escaped death" are NO Arguments
- The CLAIM should say what the (implicite) argument implies, what the main conclusion is
- Give attention to implicit argumetns
- Only use human_verification_needed TRUE when highly uncertain
- If no argument exists, use NA for ALL fields without explanation except <human_verification_needed>FALSE or TRUE</human_verification_needed>
- More than one argument possible for one aticle, one unit has one clear clame and all the xml structures
VERIFICATION: BEfore you print the results, double check claims and explanations of the argument. When the claim is just a translation or the explanation states that this is not an argument, dont print it'''
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token
print("Loading model...")
# Try to load with bfloat16 first (best quality)
# If that fails due to memory, fall back to 8-bit quantization
try:
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True
)
print("✅ Model loaded successfully in bfloat16!")
except Exception as e:
print(f"⚠️ Could not load in bfloat16: {e}")
print("Trying 8-bit quantization...")
try:
# 8-bit is much better than 4-bit
bnb_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_threshold=6.0
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
quantization_config=bnb_config,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True
)
print("✅ Model loaded successfully in 8-bit!")
except Exception as e:
print(f"⚠️ Could not load in 8-bit: {e}")
print("Falling back to 4-bit quantization (may reduce quality)...")
# Last resort: 4-bit quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
quantization_config=bnb_config,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True
)
print("⚠️ Model loaded in 4-bit (quality may be reduced)")
@spaces.GPU
def extract_arguments(text, temperature=0.01):
if not text or not text.strip():
return "Please enter some text to analyze."
# Use the same message format as the working local version
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Extract argumentative units from historical text in their original form, no summary.\n{text}"}
]
# Use apply_chat_template - the proper way to format prompts
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=5048).to(model.device)
input_length = inputs['input_ids'].shape[1]
# Validate temperature
try:
temperature = float(temperature)
temperature = max(0.05, min(0.3, temperature))
except:
temperature = 0.05
# Generate with the same parameters as local version
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=8000,
temperature=temperature,
top_p=0.95, # Match local version
repetition_penalty=1.15, # Match local version
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
generated_tokens = outputs[0][input_length:]
response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
import re
response = re.sub(r'(</[^>]+>)\s*(<[^/>])', r'\1\n\n\2', response)
# Fix XML start if needed
if not response.startswith('<argument>'):
arg_start = response.find('<argument>')
if arg_start != -1:
response = response[arg_start:]
return response
# Gradio interface
demo = gr.Interface(
fn=extract_arguments,
inputs=[
gr.Textbox(label="Input Text", placeholder="Enter newspaper text here...", lines=10),
gr.Slider(minimum=0.01, maximum=0.3, value=0.05, step=0.05, label="Temperature (lower = more consistent)")
],
outputs=[
gr.Textbox(label="Raw XML Output", lines=8)
],
title="Newspaper Argumentative Unit Extractor",
description="Extract argumentative units from news sources",
examples=[
["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.05],
["REBUILDING MESSINA. Now that Messina and Reggio are being rebuilt, the Red Cross Society of Switzeralnd has started an interesting experiment. It has sent to Sicilv to be used in rebuilding the towns, 60 chalets similar to those in use on the Alps. They are of two kinds: the one, destined for Messina, having one upper storv, while the other, to be erected in the country towns of Calabria, are of the bungalow tvpie. It is hoped that these wooden constructions will better resist the earth shocks than stone buildings, and in any case, should they fall, the casualties would be far fewer. Viewed iorm outside the chalets, which are much admired by the Sicilians, are exactly the same as those in the Alps: but the disposition or the interior has been modified somewhat to meet the needs of Italian life and a southern clime", 0.05]
]
)
if __name__ == "__main__":
demo.launch()