Argument-Mining

Sleeping

App Files Files Community

oberbics commited on Sep 4, 2025

Commit

32b13fe

verified ·

1 Parent(s): d13be49

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -10

app.py CHANGED Viewed

@@ -9,15 +9,30 @@ MODEL_ID = "oberbics/newspaper-argument-mining-V1"
 SYSTEM_PROMPT = """You are an expert at analyzing German historical texts.
 OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
-<argument>Main argument text OR NA</argument>
-<claim>Core claim in one sentence OR NA</claim>
-<explanation>Why this is an argument OR NA</explanation>
-<human_verification_needed>True OR False OR NA</human_verification_needed>
 RULES:
 - ONLY output these 4 XML tags
-- If no argument exists, use NA for all fields
-- Extract complete argumentative passages, not fragments"""
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -40,6 +55,7 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 print("Model loaded successfully!")
 def extract_arguments(text, temperature=0.1):
     if not text or not text.strip():
         return "", "Please enter some text to analyze."
@@ -47,7 +63,7 @@ def extract_arguments(text, temperature=0.1):
     prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {SYSTEM_PROMPT}<|eot_id|>
 <|start_header_id|>user<|end_header_id|>
-Extract arguments from this German historical text:
 {text}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>"""
@@ -57,7 +73,7 @@ Extract arguments from this German historical text:
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=500,
             temperature=temperature,
             do_sample=True if temperature > 0.01 else False,
             top_p=0.9,
@@ -77,7 +93,7 @@ Extract arguments from this German historical text:
     formatted = format_output(response)
     return response, formatted
-def format_output(xml_response):
     def extract_field(field_name):
         pattern = f'<{field_name}>(.*?)</{field_name}>'
         match = re.search(pattern, xml_response, re.DOTALL)
@@ -101,7 +117,7 @@ def format_output(xml_response):
     else:
         return """❌ **No Argument Found**
-The text does not contain an argumentative unit."""
 # Gradio interface
 demo = gr.Interface(

 SYSTEM_PROMPT = """You are an expert at analyzing German historical texts.
 OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
+<argument>Main argument text OR "NA"</argument>
+<claim>Core claim in one sentence OR "NA"</claim>
+<explanation>Why this is an argument OR "NA"</explanation>
+<human_verification_needed>True OR False</human_verification_needed>
+EXAMPLE WITH ARGUMENT:
+<argument>Es sind furchtbare Bilder, die sich dabei entrollen. Unter den Trümmern des einen Hause», so erzählt Luigt Barsint im Corrtcre della sera, findet man die Leichen von Unglück lichen, die in anderen Häusern gewohnt baben und die in der Ber- Wirrung de» schrcck.ichen Augenblickes instinktiv bet Fremden Hülfe und Unterschlupf suchten. Niemand erkennt jetzt diese armen Ein dringlinge, ihre Leichen werden nicht reklamiert, und man trägt sie hinunter an de» Strand, wo sie in langer Reihe einer neben den anderen hingebettet weiden, in denselben Tüchern und Decken, in denen sie tbren Tod gesunden.</argument>
+<claim>The earthquake's chaos led to unidentified victims dying in unfamiliar places.</claim>
+<explanation>Describes how people fled to other houses seeking help during the disaster, died there, and now cannot be identified or claimed by relatives. Shows cause (panic/confusion) and effect (anonymous deaths).</explanation>
+<human_verification_needed>False</human_verification_needed>
+EXAMPLE WITHOUT ARGUMENT:
+<argument>NA</argument>
+<claim>NA</claim>
+<explanation>NA</explanation>
+<human_verification_needed>FALSE</human_verification_needed>
 RULES:
 - ONLY output these 4 XML tags
+- Extract only original text without changes or use NA when you did not find an argument
+- Extract full passages/units of argument (should be more than one sentencen)
+- In cases of uncertainty or ambiguity, say human_verification_needed TRUE
+- If no argument exists, use NA for all fields except <human_verification_needed>FALSE or TRUE</human_verification_needed>
+- More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 )
 print("Model loaded successfully!")
+@spaces.GPU
 def extract_arguments(text, temperature=0.1):
     if not text or not text.strip():
         return "", "Please enter some text to analyze."
     prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {SYSTEM_PROMPT}<|eot_id|>
 <|start_header_id|>user<|end_header_id|>
+Extract arguments from historical text. Arguments in historical texts are often implicit (only have a premise and no conclusion). Pay attention to find premises:
 {text}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>"""
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=5000,
             temperature=temperature,
             do_sample=True if temperature > 0.01 else False,
             top_p=0.9,
     formatted = format_output(response)
     return response, formatted
+'''def format_output(xml_response):
     def extract_field(field_name):
         pattern = f'<{field_name}>(.*?)</{field_name}>'
         match = re.search(pattern, xml_response, re.DOTALL)
     else:
         return """❌ **No Argument Found**
+The text does not contain an argumentative unit."""'''
 # Gradio interface
 demo = gr.Interface(