Argument-Mining

Sleeping

App Files Files Community

oberbics commited on Sep 8, 2025

Commit

110e9ac

verified ·

1 Parent(s): 78595fd

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -217

app.py CHANGED Viewed

@@ -4,29 +4,12 @@ os.environ["OMP_NUM_THREADS"] = "1"
 import gradio as gr
 import torch
 import re
-import json
-import datetime
-import logging
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import spaces
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('argument_extraction.log'),
-        logging.StreamHandler()
-    ]
-)
-logger = logging.getLogger(__name__)
 # Model configuration
 MODEL_ID = "oberbics/newspaper-argument-mining-V1"
-# Add minimum length threshold for arguments
-MIN_ARGUMENT_LENGTH = 50  # Adjust this value as needed
 SYSTEM_PROMPT = """You are an expert at analyzing historical texts and you hate to summarize
 OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
@@ -58,13 +41,10 @@ RULES:
 - More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
 print("Loading tokenizer...")
-logger.info("Starting tokenizer loading")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 tokenizer.pad_token = tokenizer.eos_token
-logger.info("Tokenizer loaded successfully")
 print("Loading model...")
-logger.info("Starting model loading")
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
@@ -80,216 +60,77 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True
 )
 print("Model loaded successfully!")
-logger.info("Model loaded successfully")
-def parse_and_filter_arguments(response, min_length=MIN_ARGUMENT_LENGTH):
-    """Parse XML response and filter out arguments that are too short"""
-    try:
-        # Extract argument text using regex
-        argument_match = re.search(r'<argument>(.*?)</argument>', response, re.DOTALL)
-        if argument_match:
-            argument_text = argument_match.group(1).strip()
-            # Check if argument is meaningful and long enough
-            if argument_text and argument_text != "NA" and len(argument_text) < min_length:
-                logger.info(f"Argument filtered out due to length: {len(argument_text)} chars (min: {min_length})")
-                # Replace with NA format
-                filtered_response = """<argument>NA</argument>
-<claim>NA</claim>
-<explanation>NA</explanation>
-<human_verification_needed>False</human_verification_needed>"""
-                return filtered_response, True  # True indicates it was filtered
-        return response, False  # False indicates no filtering
-    except Exception as e:
-        logger.error(f"Error parsing arguments: {e}")
-        return response, False
-def log_interaction(input_text, temperature, output, processing_time, error=None, filtered=False):
-    """Log each interaction to JSON file for analysis"""
-    log_entry = {
-        "timestamp": datetime.datetime.now().isoformat(),
-        "input_length": len(input_text) if input_text else 0,
-        "input_preview": input_text[:100] if input_text else "",
-        "temperature": temperature,
-        "output_length": len(output) if output else 0,
-        "processing_time_seconds": processing_time,
-        "has_error": error is not None,
-        "error_message": str(error) if error else None,
-        "output_preview": output[:200] if output else "",
-        "filtered_for_length": filtered
-    }
-    # Save to JSON file
-    try:
-        with open('interaction_logs.json', 'a') as f:
-            f.write(json.dumps(log_entry) + '\n')
-    except Exception as e:
-        logger.error(f"Failed to save interaction log: {e}")
 @spaces.GPU
-def extract_arguments(text, temperature=0.1, min_arg_length=MIN_ARGUMENT_LENGTH):
-    start_time = datetime.datetime.now()
-    logger.info(f"Processing request - Input length: {len(text) if text else 0}, Temperature: {temperature}, Min argument length: {min_arg_length}")
     if not text or not text.strip():
-        error_msg = "Please enter some text to analyze."
-        logger.warning("Empty input received")
-        log_interaction(text, temperature, "", 0, error_msg)
-        return "", error_msg
-    try:
-        prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {SYSTEM_PROMPT}<|eot_id|>
 <|start_header_id|>user<|end_header_id|>
 Extract arguments from historical text.
 {text}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>"""
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=5048).to(model.device)
-        input_length = inputs['input_ids'].shape[1]
-        try:
-            if temperature is None:
-                temperature = 0.1
-            else:
-                temperature = float(temperature)
-            if temperature < 0.01:
-                temperature = 0.01
-            elif temperature > 0.3:
-                temperature = 0.3
-        except:
-            temperature = 0.1
-        logger.info(f"Starting model generation with {input_length} input tokens")
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=5000,
-                temperature=temperature,
-                do_sample=True if temperature > 0.01 else False,
-                top_p=0.9,
-                pad_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.1
-            )
-        generated_tokens = outputs[0][input_length:]
-        response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
-        # Fix XML start
-        if not response.startswith('<argument>'):
-            arg_start = response.find('<argument>')
-            if arg_start != -1:
-                response = response[arg_start:]
-        # Filter out short arguments
-        filtered_response, was_filtered = parse_and_filter_arguments(response, min_arg_length)
-        processing_time = (datetime.datetime.now() - start_time).total_seconds()
-        logger.info(f"Processing completed in {processing_time:.2f} seconds - Output length: {len(filtered_response)} - Filtered: {was_filtered}")
-        # Log successful interaction
-        log_interaction(text, temperature, filtered_response, processing_time, filtered=was_filtered)
-        return filtered_response
-    except Exception as e:
-        processing_time = (datetime.datetime.now() - start_time).total_seconds()
-        error_msg = f"Error during processing: {str(e)}"
-        logger.error(f"Processing failed after {processing_time:.2f} seconds: {e}")
-        # Log failed interaction
-        log_interaction(text, temperature, "", processing_time, e)
-        return error_msg
-def get_logs():
-    """Function to view recent logs"""
-    try:
-        with open('interaction_logs.json', 'r') as f:
-            lines = f.readlines()
-            recent_logs = lines[-10:]  # Last 10 interactions
-        log_summary = []
-        for line in recent_logs:
-            entry = json.loads(line)
-            confidence_info = f", Confidence: {entry['confidence_score']:.3f}" if entry.get('confidence_score') else ""
-            filtered_info = " [FILTERED]" if entry.get('filtered_for_length') else ""
-            rejected_info = " [REJECTED]" if entry.get('rejected') else ""
-            summary = f"[{entry['timestamp']}] Input: {entry['input_length']} chars, Output: {entry['output_length']} chars, Time: {entry['processing_time_seconds']:.2f}s{confidence_info}{filtered_info}{rejected_info}"
-            if entry['has_error']:
-                summary += f" ERROR: {entry['error_message']}"
-            log_summary.append(summary)
-        return "\n".join(log_summary)
-    except Exception as e:
-        return f"Error reading logs: {e}"
-# Gradio interface with logging viewer and length control
-with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
-    gr.Markdown("# Newspaper Argumentative Unit Extractor")
-    gr.Markdown("Extract argumentative units from news sources (filters out arguments shorter than specified length)")
-    with gr.Tab("Extract Arguments"):
-        with gr.Row():
-            with gr.Column():
-                input_text = gr.Textbox(
-                    label="Input Text",
-                    placeholder="Enter newspaper text here...",
-                    lines=10
-                )
-                temperature = gr.Slider(
-                    minimum=0.01,
-                    maximum=0.3,
-                    value=0.1,
-                    step=0.01,
-                    label="Temperature (lower = more consistent)"
-                )
-                min_length = gr.Slider(
-                    minimum=10,
-                    maximum=200,
-                    value=MIN_ARGUMENT_LENGTH,
-                    step=5,
-                    label="Minimum Argument Length (characters)"
-                )
-                extract_btn = gr.Button("Extract Arguments", variant="primary")
-            with gr.Column():
-                output_text = gr.Textbox(
-                    label="Raw XML Output",
-                    lines=8
-                )
-        extract_btn.click(
-            fn=extract_arguments,
-            inputs=[input_text, temperature, min_length],
-            outputs=[output_text]
-        )
-        # Examples
-        gr.Examples(
-            examples=[
-                ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1, 50],
-            ],
-            inputs=[input_text, temperature, min_length],
-            outputs=[output_text],
-            fn=extract_arguments
         )
-    with gr.Tab("Logs"):
-        gr.Markdown("## Recent Activity Logs")
-        log_display = gr.Textbox(
-            label="Recent Interactions",
-            lines=15,
-            value=get_logs()
-        )
-        refresh_btn = gr.Button("Refresh Logs")
-        refresh_btn.click(fn=get_logs, outputs=[log_display])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 import re
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import spaces
 # Model configuration
 MODEL_ID = "oberbics/newspaper-argument-mining-V1"
 SYSTEM_PROMPT = """You are an expert at analyzing historical texts and you hate to summarize
 OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
 - More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 tokenizer.pad_token = tokenizer.eos_token
 print("Loading model...")
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
     trust_remote_code=True
 )
 print("Model loaded successfully!")
 @spaces.GPU
+def extract_arguments(text, temperature=0.1):
     if not text or not text.strip():
+        return "", "Please enter some text to analyze."
+    prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 {SYSTEM_PROMPT}<|eot_id|>
 <|start_header_id|>user<|end_header_id|>
 Extract arguments from historical text.
 {text}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>"""
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=5048).to(model.device)
+    input_length = inputs['input_ids'].shape[1]
+    try:
+        if temperature is None:
+            temperature = 0.1
+        else:
+            temperature = float(temperature)
+        if temperature < 0.01:
+            temperature = 0.01
+        elif temperature > 0.3:
+            temperature = 0.3
+    except:
+        temperature = 0.1
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=5000,
+            temperature=temperature,
+            do_sample=True if temperature > 0.01 else False,
+            top_p=0.9,
+            pad_token_id=tokenizer.eos_token_id,
+            repetition_penalty=1.1
         )
+    generated_tokens = outputs[0][input_length:]
+    response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
+    # Fix XML start
+    if not response.startswith('<argument>'):
+        arg_start = response.find('<argument>')
+        if arg_start != -1:
+            response = response[arg_start:]
+    return response
+# Gradio interface
+demo = gr.Interface(
+    fn=extract_arguments,
+    inputs=[
+        gr.Textbox(label="Input Text", placeholder="Enter newspaper text here...", lines=10),
+        gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="Temperature (lower = more consistent)")
+    ],
+    outputs=[
+        gr.Textbox(label="Raw XML Output", lines=8)
+    ],
+    title="Newspaper Argumentative Unit Extractor",
+    description="Extract argumentative units from news sources",
+    examples=[
+        ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1],
+        ["The bourses and theatres are closed. In every quarter help committees have been estab lished. A central committee has been organised at Rome for the purpose of privately and publicly collecting donations, and organising relief expedi tions to the afflicted districts. The Duke of Aosta has accepted the presidency. From all parts of the globe come telegrams of sympathy. The entire press has founded relief funds. Every Ambassador and Minister in Rome personally visited the Ministry of the Exterior yesterday morning, and expressed sympathy on behalf of their respective countries. Doctors, firemen, and municipal guards have been despatched to Messina and Calabria from many Italian towns. The Lombard Bank of Milan has already distributed 250,000 lire to sufferers from the earthquake, and the city of Milan has sent 25 firemen to Messina. A curious result of the earthquake is that the craters of Aetna, Vesuvius, and Stromboli ceased their activity immediately after the shock. It is reported from Malta that the British war- ships “Exmouth,” “Euryalus,” “Minerva,” and “Sutlej” have left for Messina. The French Government has sent two armoured ships and three destroyers to Messina. President Fallieres, Premier Clemenceau, Minister Pichon, and the Presidents of the Senate and Chamber have all sent messages of sympathy to the Italian Government. Palermo, December 30. Yesterday evening the first official telegraphic des- patches from the prefect of Messina reached here, They state that the catastrophe is beyond human description. Many thousands of people are known to have perished. It is impossible, says the prefect, to accurately relate the frightful scenes witnessed. The help already proffered and accepted is insuffi cient for the purpose. There is pressing need of extraordinary measures of help, and provisions are in great demand. At the time of wiring the fires in many parts of the ruined city have not been got under control, and are spreading in many directions. Catania, December 30. A survivor from the catastrophe at Messina who has arrived here says: “It is impossible to describe the appalling scene. The city has been transformed into a vast heap of ruins. Almost all the inhabitants were killed; only a few thousands escaped death. There is need of doctors, tents, clothing, and pro visions for the survivors, who, deprived of all ne cessaries, are exposed to the inclemencies of the winter weather. There is need of fire engines to cope with the flames that are raging among the ruins. Messina appears as if it had been swept away by the earthquake. The railway station has collapsed. Railway carriages have been destroyed. Almost all the railway employes are dead. The streets are no longer recognisable; they look like enormous fissures in a distant and extensive heap of ruins. The Uni versity, the Post and Telegraph Office, and all the other public buildings have disappeared. The gas mains are entirely destroyed. For hours after the catastrophe the town was without any help, as the authorities, the garrison, the doctors, and apothe* * caries,—in short, all classes of the population, wjere buried under the ruins.” Three more trains and** a steamer have left Messina with vjarKled and gitives. Bremen, In cembcr 30, Information has been recei\ o, i rom the Rprt authorities at Naples that the ! ‘ -house in the Straits of Messina has been destrc* - ' It seems doubtful whether the navigation of ■ < l traits will be pos sible without risk. The N r.h German Lloyd has therefore ordered all its ship commanders to avoid the Straits. All communicauon with Sicily is inter rupted. Rome, December 30. Newspaper reports from Catanzaro state that the prefect of. Reggio, who was believed to have pe rished, has arrived there and says that he managed to escape from the prefecture when the greater part of the building had fallen in. The surrounding streets and the centre of the town down to the harbour have been totally destroyed. Only the small villas clustering in the hills surrounding the town and on the Promenade of Reggio and Campi are intact. The castle, the cathedral, and the Lyceum all collapsed, and practically every student in the Ly ceum met his or her death. The prefect adds that he believes the Bishop to be dead. The barracks fell in, burying hundreds of soldiers. Reports as to the fate of the council house are contradictory. All the fugitives from Reggio describe the disaster as frightful, and estimate the number of dead as (Continued on page 3)", 0.1]
+    ]
+)
 if __name__ == "__main__":
+    demo.launch()