Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,137 +3,94 @@ os.environ["OMP_NUM_THREADS"] = "1"
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
| 6 |
-
import re
|
| 7 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
|
|
| 8 |
import spaces
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
|
| 16 |
-
<argument>Original argument text OR "NA"</argument>
|
| 17 |
-
<claim>Core claim (implication) in one sentence OR "NA"</claim>
|
| 18 |
-
<explanation>Why this is an argument OR "NA"</explanation>
|
| 19 |
-
<human_verification_needed>True OR False</human_verification_needed>
|
| 20 |
-
|
| 21 |
-
EXAMPLE WITH ARGUMENT:
|
| 22 |
-
<argument>Es sind furchtbare Bilder, die sich dabei entrollen. Unter den Trümmern des einen Hause», so erzählt Luigt Barsint im Corrtcre della sera, findet man die Leichen von Unglück lichen, die in anderen Häusern gewohnt baben und die in der Ber- Wirrung de» schrcck.ichen Augenblickes instinktiv bet Fremden Hülfe und Unterschlupf suchten. Niemand erkennt jetzt diese armen Ein dringlinge, ihre Leichen werden nicht reklamiert, und man trägt sie hinunter an de» Strand, wo sie in langer Reihe einer neben den anderen hingebettet weiden, in denselben Tüchern und Decken, in denen sie tbren Tod gesunden.</argument>
|
| 23 |
-
<claim>The earthquake's chaos led to unidentified victims dying in unfamiliar places.</claim>
|
| 24 |
-
<explanation>Describes how people fled to other houses seeking help during the disaster, died there, and now cannot be identified or claimed by relatives. Shows cause (panic/confusion) and effect (anonymous deaths).</explanation>
|
| 25 |
-
<human_verification_needed>False</human_verification_needed>
|
| 26 |
-
|
| 27 |
-
EXAMPLE WITHOUT ARGUMENT:
|
| 28 |
-
<argument>NA</argument>
|
| 29 |
-
<claim>NA</claim>
|
| 30 |
-
<explanation>NA</explanation>
|
| 31 |
-
<human_verification_needed>FALSE</human_verification_needed>
|
| 32 |
-
|
| 33 |
-
RULES:
|
| 34 |
-
- NO SUMMARY; ONLY ORIGINAL EXTRACTOM FROM THE TEXT; don't extract anything that is not in the text. Only extract word by word
|
| 35 |
-
- ONLY output these 4 XML tags
|
| 36 |
-
- Factual reportings such as "Dem Vulkanausbruch folgten drei Sturzwellen in etwa 10 Meter Höhe" or "Almost all the inhabitants were killed; only a few thousands escaped death" are NO arguments
|
| 37 |
-
- Extract only original text without changes or use NA when you did not find an argument
|
| 38 |
-
- The claim is not a translation of summary of argument. It should say what the (implicite) argument implies
|
| 39 |
-
- In cases of uncertainty or ambiguity, say human_verification_needed TRUE
|
| 40 |
-
- If no argument exists, use NA for all fields except <human_verification_needed>FALSE or TRUE</human_verification_needed>
|
| 41 |
-
- More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures
|
| 42 |
-
|
| 43 |
-
VERIFICATION: BEfore you print the results, double check explanations of the argument. When the explanation states that this is not an argument, don't print it"""
|
| 44 |
|
| 45 |
print("Loading tokenizer...")
|
| 46 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 47 |
tokenizer.pad_token = tokenizer.eos_token
|
| 48 |
|
| 49 |
-
print("Loading model...")
|
| 50 |
bnb_config = BitsAndBytesConfig(
|
| 51 |
load_in_4bit=True,
|
| 52 |
bnb_4bit_quant_type="nf4",
|
| 53 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 54 |
-
bnb_4bit_use_double_quant=True
|
| 55 |
)
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
| 59 |
quantization_config=bnb_config,
|
| 60 |
device_map="auto",
|
| 61 |
torch_dtype=torch.bfloat16,
|
| 62 |
trust_remote_code=True
|
| 63 |
)
|
| 64 |
-
print("Model loaded successfully!")
|
| 65 |
|
|
|
|
|
|
|
| 66 |
|
|
|
|
|
|
|
| 67 |
|
|
|
|
| 68 |
|
| 69 |
@spaces.GPU
|
| 70 |
def extract_arguments(text, temperature=0.1):
|
| 71 |
if not text or not text.strip():
|
| 72 |
return "", "Please enter some text to analyze."
|
| 73 |
-
|
| 74 |
prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 75 |
{SYSTEM_PROMPT}<|eot_id|>
|
| 76 |
<|start_header_id|>user<|end_header_id|>
|
| 77 |
Extract arguments from historical text.
|
| 78 |
{text}<|eot_id|>
|
| 79 |
<|start_header_id|>assistant<|end_header_id|>"""
|
| 80 |
-
|
| 81 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=10048).to(model.device)
|
| 82 |
input_length = inputs['input_ids'].shape[1]
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
else:
|
| 87 |
-
temperature = float(temperature)
|
| 88 |
-
|
| 89 |
-
if temperature < 0.01:
|
| 90 |
-
temperature = 0.01
|
| 91 |
-
elif temperature > 0.3:
|
| 92 |
-
temperature = 0.3
|
| 93 |
-
except:
|
| 94 |
-
temperature = 0.1
|
| 95 |
|
| 96 |
with torch.no_grad():
|
| 97 |
outputs = model.generate(
|
| 98 |
**inputs,
|
| 99 |
max_new_tokens=5000,
|
| 100 |
temperature=temperature,
|
| 101 |
-
do_sample=
|
| 102 |
top_p=0.9,
|
| 103 |
pad_token_id=tokenizer.eos_token_id,
|
| 104 |
-
repetition_penalty=1.1
|
| 105 |
)
|
| 106 |
-
|
| 107 |
generated_tokens = outputs[0][input_length:]
|
| 108 |
response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 109 |
|
| 110 |
-
# Fix XML start
|
| 111 |
if not response.startswith('<argument>'):
|
| 112 |
arg_start = response.find('<argument>')
|
| 113 |
if arg_start != -1:
|
| 114 |
response = response[arg_start:]
|
| 115 |
-
|
| 116 |
-
return response
|
| 117 |
|
|
|
|
| 118 |
|
| 119 |
-
# Gradio
|
| 120 |
demo = gr.Interface(
|
| 121 |
fn=extract_arguments,
|
| 122 |
inputs=[
|
| 123 |
gr.Textbox(label="Input Text", placeholder="Enter newspaper text here...", lines=10),
|
| 124 |
gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="Temperature (lower = more consistent)")
|
| 125 |
],
|
| 126 |
-
outputs=[
|
| 127 |
-
gr.Textbox(label="Raw XML Output", lines=8)
|
| 128 |
-
],
|
| 129 |
title="Newspaper Argumentative Unit Extractor",
|
| 130 |
-
description="Extract argumentative units from news sources"
|
| 131 |
-
examples=[
|
| 132 |
-
["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1],
|
| 133 |
-
["Rome, January 20. At the request of the Queen, Dr. Quinco, the Court physician, has left Rome in order to super vise the medical arrangements, and the distribution of clothing and linen which has been prepared at the Quirinal under Her Majesty’s eye, in places on the Calabrian coast and inland where medical help is scarce. Rome, January 20. The controversy as to whether Messina is to be rebuilt on its former lines or finally abandoned is still in full swing. The newspapers state that a mere village will probably represent Messina for many years to come, as even should the rebuilding project eventually be decided upon, the work of clearing away the enormous quantity of debris and constructing new foundations must extend over a lengthy period. Plans for the erection of so-called earthquake-proof houses are engaging the attention of architects, but pressmen who visited the scene shortly after the disaster state that one building of this description was utterly wrecked. The Mu- sella plain, not far distant from Messina, has been selected as a site for the erection of temporary habi tations, after having been inspected by Signor Ca- sana, the Minister for War. The problem of pro viding temporary accommodation becomes daily more pressing, as the majority of the survivors refuse to leave their household goods lying under the ruins to the mercy of plunderers. Both Catania and Pa lermo, however, are crowded with refugees, and great difficulty is experienced in finding shelter for them. General Mazza, the provisional commandant at Messina, continues to send favourable reports regarding the health of the troops and survivors. Conditions at Reggio remain the same, but no fur ther rescues have been reported from there since Saturday. Milan, January 19. Signor Barzini, writing £n the Corriere de la Sera, accuses the authorities at Messina of inactivity, want of energy, and pedantry.", 0.1]
|
| 134 |
-
]
|
| 135 |
)
|
| 136 |
|
| 137 |
-
|
| 138 |
if __name__ == "__main__":
|
| 139 |
demo.launch()
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
|
|
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 7 |
+
from peft import PeftModel
|
| 8 |
import spaces
|
| 9 |
|
| 10 |
+
# Base and adapter
|
| 11 |
+
BASE_MODEL = "meta-llama/Meta-Llama-3.3-70B-Instruct" # official base model
|
| 12 |
+
ADAPTER = "oberbics/llama-3.3-70B-adapter-newspaper-argument-mining"
|
| 13 |
+
|
| 14 |
+
SYSTEM_PROMPT = """... your system prompt ..."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
print("Loading tokenizer...")
|
| 17 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 18 |
tokenizer.pad_token = tokenizer.eos_token
|
| 19 |
|
| 20 |
+
print("Loading base model...")
|
| 21 |
bnb_config = BitsAndBytesConfig(
|
| 22 |
load_in_4bit=True,
|
| 23 |
bnb_4bit_quant_type="nf4",
|
| 24 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 25 |
+
bnb_4bit_use_double_quant=True,
|
| 26 |
)
|
| 27 |
|
| 28 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
| 29 |
+
BASE_MODEL,
|
| 30 |
quantization_config=bnb_config,
|
| 31 |
device_map="auto",
|
| 32 |
torch_dtype=torch.bfloat16,
|
| 33 |
trust_remote_code=True
|
| 34 |
)
|
|
|
|
| 35 |
|
| 36 |
+
print("Loading adapter...")
|
| 37 |
+
model = PeftModel.from_pretrained(base_model, ADAPTER)
|
| 38 |
|
| 39 |
+
# (Optional) merge LoRA weights into the base model for faster inference
|
| 40 |
+
# model = model.merge_and_unload()
|
| 41 |
|
| 42 |
+
print("Model + adapter loaded successfully!")
|
| 43 |
|
| 44 |
@spaces.GPU
|
| 45 |
def extract_arguments(text, temperature=0.1):
|
| 46 |
if not text or not text.strip():
|
| 47 |
return "", "Please enter some text to analyze."
|
| 48 |
+
|
| 49 |
prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
| 50 |
{SYSTEM_PROMPT}<|eot_id|>
|
| 51 |
<|start_header_id|>user<|end_header_id|>
|
| 52 |
Extract arguments from historical text.
|
| 53 |
{text}<|eot_id|>
|
| 54 |
<|start_header_id|>assistant<|end_header_id|>"""
|
| 55 |
+
|
| 56 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=10048).to(model.device)
|
| 57 |
input_length = inputs['input_ids'].shape[1]
|
| 58 |
+
|
| 59 |
+
temperature = float(temperature) if temperature else 0.1
|
| 60 |
+
temperature = max(0.01, min(temperature, 0.3))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
with torch.no_grad():
|
| 63 |
outputs = model.generate(
|
| 64 |
**inputs,
|
| 65 |
max_new_tokens=5000,
|
| 66 |
temperature=temperature,
|
| 67 |
+
do_sample=temperature > 0.01,
|
| 68 |
top_p=0.9,
|
| 69 |
pad_token_id=tokenizer.eos_token_id,
|
| 70 |
+
repetition_penalty=1.1,
|
| 71 |
)
|
| 72 |
+
|
| 73 |
generated_tokens = outputs[0][input_length:]
|
| 74 |
response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 75 |
|
|
|
|
| 76 |
if not response.startswith('<argument>'):
|
| 77 |
arg_start = response.find('<argument>')
|
| 78 |
if arg_start != -1:
|
| 79 |
response = response[arg_start:]
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
return response
|
| 82 |
|
| 83 |
+
# Gradio UI
|
| 84 |
demo = gr.Interface(
|
| 85 |
fn=extract_arguments,
|
| 86 |
inputs=[
|
| 87 |
gr.Textbox(label="Input Text", placeholder="Enter newspaper text here...", lines=10),
|
| 88 |
gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="Temperature (lower = more consistent)")
|
| 89 |
],
|
| 90 |
+
outputs=[gr.Textbox(label="Raw XML Output", lines=8)],
|
|
|
|
|
|
|
| 91 |
title="Newspaper Argumentative Unit Extractor",
|
| 92 |
+
description="Extract argumentative units from news sources"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
)
|
| 94 |
|
|
|
|
| 95 |
if __name__ == "__main__":
|
| 96 |
demo.launch()
|