Argument-Mining

Sleeping

App Files Files Community

oberbics commited on Sep 8, 2025

Commit

8ccc09c

verified ·

1 Parent(s): 9e63dbb

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -152

app.py CHANGED Viewed

@@ -4,9 +4,23 @@ os.environ["OMP_NUM_THREADS"] = "1"
 import gradio as gr
 import torch
 import re
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import spaces
 # Model configuration
 MODEL_ID = "oberbics/newspaper-argument-mining-V1"
@@ -41,10 +55,13 @@ RULES:
 - More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
 print("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 tokenizer.pad_token = tokenizer.eos_token
 print("Loading model...")
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
@@ -60,102 +77,39 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True
 )
 print("Model loaded successfully!")
-def parse_argumentative_units(response):
-    """Parse multiple argumentative units from model response"""
-    units = []
-    # Find all argument blocks using regex
-    pattern = r'<argument>(.*?)</argument>\s*<claim>(.*?)</claim>\s*<explanation>(.*?)</explanation>\s*<human_verification_needed>(.*?)</human_verification_needed>'
-    matches = re.findall(pattern, response, re.DOTALL | re.IGNORECASE)
-    for match in matches:
-        argument, claim, explanation, verification = [m.strip() for m in match]
-        units.append({
-            'argument': argument,
-            'claim': claim,
-            'explanation': explanation,
-            'human_verification_needed': verification.lower() == 'true',
-            'raw_verification': verification
-        })
-    return units
-def calculate_confidence_score(unit, position):
-    """Calculate confidence based on model's natural ordering + basic quality checks"""
-    # Base confidence from position (model's implicit ranking)
-    # First argument = highest confidence, then declining
-    position_confidence = max(0.2, 0.95 - (position * 0.15))  # 0.95, 0.80, 0.65, 0.50, 0.35, 0.20...
-    # Only major adjustments for obvious quality issues
-    if unit['argument'] == 'NA':
-        return 0.0
-    # Small penalty if model itself says verification needed
-    if unit['human_verification_needed']:
-        position_confidence -= 0.1
-    # Small penalty for very short arguments (likely incomplete)
-    if len(unit['argument']) < 30:
-        position_confidence -= 0.2
-    return max(0.0, min(1.0, position_confidence))
-def filter_high_confidence_arguments(units, confidence_threshold=0.6):
-    """Filter argumentative units by confidence score"""
-    scored_units = []
-    for unit in units:
-        confidence = calculate_confidence_score(unit)
-        unit['confidence_score'] = confidence
-        scored_units.append(unit)
-    # Sort by confidence (highest first)
-    scored_units.sort(key=lambda x: x['confidence_score'], reverse=True)
-    # Filter by threshold
-    high_confidence_units = [unit for unit in scored_units if unit['confidence_score'] >= confidence_threshold]
-    return high_confidence_units, scored_units
-def format_filtered_output(high_confidence_units, show_scores=True, debug=False):
-    """Format the high-confidence units for display"""
-    if not high_confidence_units:
-        return "No high-confidence arguments found."
-    output = []
-    for i, unit in enumerate(high_confidence_units, 1):
-        if show_scores:
-            output.append(f"=== ARGUMENT {i} (Confidence: {unit['confidence_score']:.3f}) ===")
-        else:
-            output.append(f"=== ARGUMENT {i} ===")
-        if debug:
-            # Show lengths and verification status for debugging
-            arg_len = len(unit['argument']) if unit['argument'] != 'NA' else 0
-            claim_len = len(unit['claim']) if unit['claim'] != 'NA' else 0
-            exp_len = len(unit['explanation']) if unit['explanation'] != 'NA' else 0
-            output.append(f"[DEBUG: arg_len={arg_len}, claim_len={claim_len}, exp_len={exp_len}, verification={unit['human_verification_needed']}]")
-        output.append(f"<argument>{unit['argument']}</argument>")
-        output.append(f"<claim>{unit['claim']}</claim>")
-        output.append(f"<explanation>{unit['explanation']}</explanation>")
-        output.append(f"<human_verification_needed>{unit['raw_verification']}</human_verification_needed>")
-        output.append("")
-    return "\n".join(output)
 @spaces.GPU
-def extract_arguments(text, temperature=0.1, confidence_threshold=0.6, show_all=False):
     if not text or not text.strip():
-        return "", "Please enter some text to analyze."
     try:
         prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
@@ -181,7 +135,7 @@ Extract arguments from historical text.
         except:
             temperature = 0.1
-        print(f"DEBUG: Generating with temperature {temperature}")
         with torch.no_grad():
             outputs = model.generate(
@@ -197,78 +151,103 @@ Extract arguments from historical text.
         generated_tokens = outputs[0][input_length:]
         response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
-        print(f"DEBUG: Raw response length: {len(response)}")
-        print(f"DEBUG: Response starts with: {response[:100]}")
         # Fix XML start
         if not response.startswith('<argument>'):
             arg_start = response.find('<argument>')
             if arg_start != -1:
                 response = response[arg_start:]
-                print(f"DEBUG: Fixed response start, new length: {len(response)}")
-        # Parse and filter arguments
-        units = parse_argumentative_units(response)
-        if not units:
-            print("DEBUG: No units found, returning raw response")
-            return response, f"Raw output returned (no parseable argumentative units found)"
-        print(f"DEBUG: Found {len(units)} units, applying confidence filtering")
-        try:
-            high_confidence_units, all_units = filter_high_confidence_arguments(units, confidence_threshold)
-            print(f"DEBUG: {len(high_confidence_units)} high confidence units")
-        except Exception as e:
-            print(f"DEBUG: Error in confidence filtering: {str(e)}")
-            return response, f"Error in confidence filtering: {str(e)}"
-        if show_all:
-            # Show all units with confidence scores
-            all_output = []
-            for i, unit in enumerate(all_units, 1):
-                status = "✓ HIGH CONFIDENCE" if unit['confidence_score'] >= confidence_threshold else "⚠ LOW CONFIDENCE"
-                all_output.append(f"=== ARGUMENT {i} - {status} (Score: {unit['confidence_score']:.3f}) ===")
-                all_output.append(f"<argument>{unit['argument']}</argument>")
-                all_output.append(f"<claim>{unit['claim']}</claim>")
-                all_output.append(f"<explanation>{unit['explanation']}</explanation>")
-                all_output.append(f"<human_verification_needed>{unit['raw_verification']}</human_verification_needed>")
-                all_output.append("")
-            return "\n".join(all_output), f"Found {len(units)} total units, {len(high_confidence_units)} high-confidence"
-        else:
-            # Show only high-confidence units
-            filtered_output = format_filtered_output(high_confidence_units, show_scores=True)
-            return filtered_output, f"Showing {len(high_confidence_units)}/{len(units)} high-confidence arguments (threshold: {confidence_threshold})"
     except Exception as e:
-        error_msg = f"Error during processing: {str(e)}"
-        print(f"DEBUG: {error_msg}")
-        return error_msg, "Processing failed - check console for details"
-# Gradio interface
-demo = gr.Interface(
-    fn=extract_arguments,
-    inputs=[
-        gr.Textbox(label="Input Text", placeholder="Enter newspaper text here...", lines=10),
-        gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="Temperature (lower = more consistent)"),
-        gr.Slider(minimum=0.3, maximum=0.9, value=0.6, step=0.05, label="Confidence Threshold (higher = more selective)"),
-        gr.Checkbox(label="Show All Arguments (including low confidence)", value=False)
-    ],
-    outputs=[
-        gr.Textbox(label="Filtered Arguments", lines=12),
-        gr.Textbox(label="Summary", lines=1)
-    ],
-    title="Newspaper Argumentative Unit Extractor with Confidence Filtering",
-    description="Extract and filter argumentative units from news sources based on confidence scores",
-    examples=[
-        ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1, 0.6, False],
-        ["The bourses and theatres are closed. In every quarter help committees have been estab lished. A central committee has been organised at Rome for the purpose of privately and publicly collecting donations, and organising relief expedi tions to the afflicted districts. The Duke of Aosta has accepted the presidency. From all parts of the globe come telegrams of sympathy. The entire press has founded relief funds. Every Ambassador and Minister in Rome personally visited the Ministry of the Exterior yesterday morning, and expressed sympathy on behalf of their respective countries. Doctors, firemen, and municipal guards have been despatched to Messina and Calabria from many Italian towns. The Lombard Bank of Milan has already distributed 250,000 lire to sufferers from the earthquake, and the city of Milan has sent 25 firemen to Messina. A curious result of the earthquake is that the craters of Aetna, Vesuvius, and Stromboli ceased their activity immediately after the shock. It is reported from Malta that the British war- ships \"Exmouth,\" \"Euryalus,\" \"Minerva,\" and \"Sutlej\" have left for Messina. The French Government has sent two armoured ships and three destroyers to Messina. President Fallieres, Premier Clemenceau, Minister Pichon, and the Presidents of the Senate and Chamber have all sent messages of sympathy to the Italian Government. Palermo, December 30. Yesterday evening the first official telegraphic des- patches from the prefect of Messina reached here, They state that the catastrophe is beyond human description. Many thousands of people are known to have perished. It is impossible, says the prefect, to accurately relate the frightful scenes witnessed. The help already proffered and accepted is insuffi cient for the purpose. There is pressing need of extraordinary measures of help, and provisions are in great demand. At the time of wiring the fires in many parts of the ruined city have not been got under control, and are spreading in many directions. Catania, December 30. A survivor from the catastrophe at Messina who has arrived here says: \"It is impossible to describe the appalling scene. The city has been transformed into a vast heap of ruins. Almost all the inhabitants were killed; only a few thousands escaped death. There is need of doctors, tents, clothing, and pro visions for the survivors, who, deprived of all ne cessaries, are exposed to the inclemencies of the winter weather. There is need of fire engines to cope with the flames that are raging among the ruins. Messina appears as if it had been swept away by the earthquake. The railway station has collapsed. Railway carriages have been destroyed. Almost all the railway employes are dead. The streets are no longer recognisable; they look like enormous fissures in a distant and extensive heap of ruins. The Uni versity, the Post and Telegraph Office, and all the other public buildings have disappeared. The gas mains are entirely destroyed. For hours after the catastrophe the town was without any help, as the authorities, the garrison, the doctors, and apothe* * caries,—in short, all classes of the population, wjere buried under the ruins.\" Three more trains and** a steamer have left Messina with vjarKled and gitives. Bremen, In cembcr 30, Information has been recei\ o, i rom the Rprt authorities at Naples that the ! ' -house in the Straits of Messina has been destrc* - ' It seems doubtful whether the navigation of ■ < l traits will be pos sible without risk. The N r.h German Lloyd has therefore ordered all its ship commanders to avoid the Straits. All communicauon with Sicily is inter rupted. Rome, December 30. Newspaper reports from Catanzaro state that the prefect of. Reggio, who was believed to have pe rished, has arrived there and says that he managed to escape from the prefecture when the greater part of the building had fallen in. The surrounding streets and the centre of the town down to the harbour have been totally destroyed. Only the small villas clustering in the hills surrounding the town and on the Promenade of Reggio and Campi are intact. The castle, the cathedral, and the Lyceum all collapsed, and practically every student in the Ly ceum met his or her death. The prefect adds that he believes the Bishop to be dead. The barracks fell in, burying hundreds of soldiers. Reports as to the fate of the council house are contradictory. All the fugitives from Reggio describe the disaster as frightful, and estimate the number of dead as (Continued on page 3)", 0.1, 0.6, False]
-    ]
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 import re
+import json
+import datetime
+import logging
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import spaces
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('argument_extraction.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
 # Model configuration
 MODEL_ID = "oberbics/newspaper-argument-mining-V1"
 - More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
 print("Loading tokenizer...")
+logger.info("Starting tokenizer loading")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 tokenizer.pad_token = tokenizer.eos_token
+logger.info("Tokenizer loaded successfully")
 print("Loading model...")
+logger.info("Starting model loading")
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
     trust_remote_code=True
 )
 print("Model loaded successfully!")
+logger.info("Model loaded successfully")
+def log_interaction(input_text, temperature, output, processing_time, error=None):
+    """Log each interaction to JSON file for analysis"""
+    log_entry = {
+        "timestamp": datetime.datetime.now().isoformat(),
+        "input_length": len(input_text) if input_text else 0,
+        "input_preview": input_text[:100] if input_text else "",
+        "temperature": temperature,
+        "output_length": len(output) if output else 0,
+        "processing_time_seconds": processing_time,
+        "has_error": error is not None,
+        "error_message": str(error) if error else None,
+        "output_preview": output[:200] if output else ""
+    }
+    # Save to JSON file
+    try:
+        with open('interaction_logs.json', 'a') as f:
+            f.write(json.dumps(log_entry) + '\n')
+    except Exception as e:
+        logger.error(f"Failed to save interaction log: {e}")
 @spaces.GPU
+def extract_arguments(text, temperature=0.1):
+    start_time = datetime.datetime.now()
+    logger.info(f"Processing request - Input length: {len(text) if text else 0}, Temperature: {temperature}")
     if not text or not text.strip():
+        error_msg = "Please enter some text to analyze."
+        logger.warning("Empty input received")
+        log_interaction(text, temperature, "", 0, error_msg)
+        return "", error_msg
     try:
         prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
         except:
             temperature = 0.1
+        logger.info(f"Starting model generation with {input_length} input tokens")
         with torch.no_grad():
             outputs = model.generate(
         generated_tokens = outputs[0][input_length:]
         response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
         # Fix XML start
         if not response.startswith('<argument>'):
             arg_start = response.find('<argument>')
             if arg_start != -1:
                 response = response[arg_start:]
+        processing_time = (datetime.datetime.now() - start_time).total_seconds()
+        logger.info(f"Processing completed in {processing_time:.2f} seconds - Output length: {len(response)}")
+        # Log successful interaction
+        log_interaction(text, temperature, response, processing_time)
+        return response
+    except Exception as e:
+        processing_time = (datetime.datetime.now() - start_time).total_seconds()
+        error_msg = f"Error during processing: {str(e)}"
+        logger.error(f"Processing failed after {processing_time:.2f} seconds: {e}")
+        # Log failed interaction
+        log_interaction(text, temperature, "", processing_time, e)
+        return error_msg
+def get_logs():
+    """Function to view recent logs"""
+    try:
+        with open('interaction_logs.json', 'r') as f:
+            lines = f.readlines()
+            recent_logs = lines[-10:]  # Last 10 interactions
+        log_summary = []
+        for line in recent_logs:
+            entry = json.loads(line)
+            confidence_info = f", Confidence: {entry['confidence_score']:.3f}" if entry.get('confidence_score') else ""
+            summary = f"[{entry['timestamp']}] Input: {entry['input_length']} chars, Output: {entry['output_length']} chars, Time: {entry['processing_time_seconds']:.2f}s{confidence_info}"
+            if entry['has_error']:
+                summary += f" ERROR: {entry['error_message']}"
+            log_summary.append(summary)
+        return "\n".join(log_summary)
     except Exception as e:
+        return f"Error reading logs: {e}"
+# Gradio interface with logging viewer
+with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
+    gr.Markdown("# Newspaper Argumentative Unit Extractor")
+    gr.Markdown("Extract argumentative units from news sources")
+    with gr.Tab("Extract Arguments"):
+        with gr.Row():
+            with gr.Column():
+                input_text = gr.Textbox(
+                    label="Input Text",
+                    placeholder="Enter newspaper text here...",
+                    lines=10
+                )
+                temperature = gr.Slider(
+                    minimum=0.01,
+                    maximum=0.3,
+                    value=0.1,
+                    step=0.01,
+                    label="Temperature (lower = more consistent)"
+                )
+                extract_btn = gr.Button("Extract Arguments", variant="primary")
+            with gr.Column():
+                output_text = gr.Textbox(
+                    label="Raw XML Output",
+                    lines=8
+                )
+        extract_btn.click(
+            fn=extract_arguments,
+            inputs=[input_text, temperature],
+            outputs=[output_text]
+        )
+        # Examples
+        gr.Examples(
+            examples=[
+                ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1],
+            ],
+            inputs=[input_text, temperature],
+            outputs=[output_text],
+            fn=extract_arguments
+        )
+    with gr.Tab("Logs"):
+        gr.Markdown("## Recent Activity Logs")
+        log_display = gr.Textbox(
+            label="Recent Interactions",
+            lines=15,
+            value=get_logs()
+        )
+        refresh_btn = gr.Button("Refresh Logs")
+        refresh_btn.click(fn=get_logs, outputs=[log_display])
 if __name__ == "__main__":
+    demo.launch()