Argument-Mining

Sleeping

App Files Files Community

oberbics commited on Sep 3, 2025

Commit

f06ced6

verified ·

1 Parent(s): 69ab256

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -39

app.py CHANGED Viewed

@@ -1,14 +1,11 @@
-# app.py - Fixed for Hugging Face Spaces with lazy loading
-import spaces
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import re
 # Model configuration
 MODEL_ID = "oberbics/llama-3.1-model-newspaper-arguments-V1"
-# System prompt
 SYSTEM_PROMPT = """You are an expert at analyzing German historical texts.
 OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
@@ -22,7 +19,6 @@ RULES:
 - If no argument exists, use NA for all fields
 - Extract complete argumentative passages, not fragments"""
-# Initialize model and tokenizer at module level (like the working example)
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 tokenizer.pad_token = tokenizer.eos_token
@@ -44,14 +40,10 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 print("Model loaded successfully!")
-@spaces.GPU(duration=120)  # Request GPU for 120 seconds per inference
 def extract_arguments(text, temperature=0.1):
-    """Extract argumentative units from text"""
     if not text or not text.strip():
         return "", "Please enter some text to analyze."
-    # Build prompt
     prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {SYSTEM_PROMPT}<|eot_id|>
 <|start_header_id|>user<|end_header_id|>
@@ -59,9 +51,7 @@ Extract arguments from this German historical text:
 {text}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>"""
-    # Tokenize and generate
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
     input_length = inputs['input_ids'].shape[1]
     with torch.no_grad():
@@ -75,24 +65,19 @@ Extract arguments from this German historical text:
             repetition_penalty=1.1
         )
-    # Decode response
     generated_tokens = outputs[0][input_length:]
     response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
-    # Ensure proper XML structure
     if not response.startswith('<argument>'):
         arg_start = response.find('<argument>')
         if arg_start != -1:
             response = response[arg_start:]
-    # Parse and format
     formatted = format_output(response)
     return response, formatted
 def format_output(xml_response):
-    """Format XML response for display"""
     def extract_field(field_name):
         pattern = f'<{field_name}>(.*?)</{field_name}>'
         match = re.search(pattern, xml_response, re.DOTALL)
@@ -103,10 +88,8 @@ def format_output(xml_response):
     explanation = extract_field('explanation')
     verification = extract_field('human_verification_needed')
-    has_argument = argument != 'NA' and argument != 'ERROR'
-    if has_argument:
-        result = f"""✅ **Argument Found**
 **Argument:** {argument}
@@ -116,28 +99,16 @@ def format_output(xml_response):
 **Verification Needed:** {verification}"""
     else:
-        result = """❌ **No Argument Found**
 The text does not contain an argumentative unit."""
-    return result
-# Create simple interface (following the working example pattern)
 demo = gr.Interface(
     fn=extract_arguments,
     inputs=[
-        gr.Textbox(
-            label="Input Text",
-            placeholder="Enter German historical newspaper text here...",
-            lines=10
-        ),
-        gr.Slider(
-            minimum=0.01,
-            maximum=0.3,
-            value=0.1,
-            step=0.01,
-            label="Temperature (lower = more consistent)"
-        )
     ],
     outputs=[
         gr.Textbox(label="Raw XML Output", lines=8),
@@ -153,4 +124,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 import re
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 # Model configuration
 MODEL_ID = "oberbics/llama-3.1-model-newspaper-arguments-V1"
 SYSTEM_PROMPT = """You are an expert at analyzing German historical texts.
 OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
 - If no argument exists, use NA for all fields
 - Extract complete argumentative passages, not fragments"""
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 tokenizer.pad_token = tokenizer.eos_token
 )
 print("Model loaded successfully!")
 def extract_arguments(text, temperature=0.1):
     if not text or not text.strip():
         return "", "Please enter some text to analyze."
     prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {SYSTEM_PROMPT}<|eot_id|>
 <|start_header_id|>user<|end_header_id|>
 {text}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>"""
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
     input_length = inputs['input_ids'].shape[1]
     with torch.no_grad():
             repetition_penalty=1.1
         )
     generated_tokens = outputs[0][input_length:]
     response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
+    # Fix XML start
     if not response.startswith('<argument>'):
         arg_start = response.find('<argument>')
         if arg_start != -1:
             response = response[arg_start:]
     formatted = format_output(response)
     return response, formatted
 def format_output(xml_response):
     def extract_field(field_name):
         pattern = f'<{field_name}>(.*?)</{field_name}>'
         match = re.search(pattern, xml_response, re.DOTALL)
     explanation = extract_field('explanation')
     verification = extract_field('human_verification_needed')
+    if argument != 'NA' and argument != 'ERROR':
+        return f"""✅ **Argument Found**
 **Argument:** {argument}
 **Verification Needed:** {verification}"""
     else:
+        return """❌ **No Argument Found**
 The text does not contain an argumentative unit."""
+# Gradio interface
 demo = gr.Interface(
     fn=extract_arguments,
     inputs=[
+        gr.Textbox(label="Input Text", placeholder="Enter German historical newspaper text here...", lines=10),
+        gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="Temperature (lower = more consistent)")
     ],
     outputs=[
         gr.Textbox(label="Raw XML Output", lines=8),
 )
 if __name__ == "__main__":
+    demo.launch()