oberbics commited on
Commit
8ccc09c
·
verified ·
1 Parent(s): 9e63dbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -152
app.py CHANGED
@@ -4,9 +4,23 @@ os.environ["OMP_NUM_THREADS"] = "1"
4
  import gradio as gr
5
  import torch
6
  import re
 
 
 
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
  import spaces
9
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Model configuration
11
  MODEL_ID = "oberbics/newspaper-argument-mining-V1"
12
 
@@ -41,10 +55,13 @@ RULES:
41
  - More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
42
 
43
  print("Loading tokenizer...")
 
44
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
45
  tokenizer.pad_token = tokenizer.eos_token
 
46
 
47
  print("Loading model...")
 
48
  bnb_config = BitsAndBytesConfig(
49
  load_in_4bit=True,
50
  bnb_4bit_quant_type="nf4",
@@ -60,102 +77,39 @@ model = AutoModelForCausalLM.from_pretrained(
60
  trust_remote_code=True
61
  )
62
  print("Model loaded successfully!")
63
-
64
-
65
- def parse_argumentative_units(response):
66
- """Parse multiple argumentative units from model response"""
67
- units = []
68
-
69
- # Find all argument blocks using regex
70
- pattern = r'<argument>(.*?)</argument>\s*<claim>(.*?)</claim>\s*<explanation>(.*?)</explanation>\s*<human_verification_needed>(.*?)</human_verification_needed>'
71
- matches = re.findall(pattern, response, re.DOTALL | re.IGNORECASE)
72
-
73
- for match in matches:
74
- argument, claim, explanation, verification = [m.strip() for m in match]
75
-
76
- units.append({
77
- 'argument': argument,
78
- 'claim': claim,
79
- 'explanation': explanation,
80
- 'human_verification_needed': verification.lower() == 'true',
81
- 'raw_verification': verification
82
- })
83
-
84
- return units
85
-
86
-
87
- def calculate_confidence_score(unit, position):
88
- """Calculate confidence based on model's natural ordering + basic quality checks"""
89
-
90
- # Base confidence from position (model's implicit ranking)
91
- # First argument = highest confidence, then declining
92
- position_confidence = max(0.2, 0.95 - (position * 0.15)) # 0.95, 0.80, 0.65, 0.50, 0.35, 0.20...
93
-
94
- # Only major adjustments for obvious quality issues
95
- if unit['argument'] == 'NA':
96
- return 0.0
97
 
98
- # Small penalty if model itself says verification needed
99
- if unit['human_verification_needed']:
100
- position_confidence -= 0.1
101
-
102
- # Small penalty for very short arguments (likely incomplete)
103
- if len(unit['argument']) < 30:
104
- position_confidence -= 0.2
105
-
106
- return max(0.0, min(1.0, position_confidence))
107
-
108
-
109
- def filter_high_confidence_arguments(units, confidence_threshold=0.6):
110
- """Filter argumentative units by confidence score"""
111
- scored_units = []
112
-
113
- for unit in units:
114
- confidence = calculate_confidence_score(unit)
115
- unit['confidence_score'] = confidence
116
- scored_units.append(unit)
117
-
118
- # Sort by confidence (highest first)
119
- scored_units.sort(key=lambda x: x['confidence_score'], reverse=True)
120
-
121
- # Filter by threshold
122
- high_confidence_units = [unit for unit in scored_units if unit['confidence_score'] >= confidence_threshold]
123
-
124
- return high_confidence_units, scored_units
125
-
126
-
127
- def format_filtered_output(high_confidence_units, show_scores=True, debug=False):
128
- """Format the high-confidence units for display"""
129
- if not high_confidence_units:
130
- return "No high-confidence arguments found."
131
-
132
- output = []
133
- for i, unit in enumerate(high_confidence_units, 1):
134
- if show_scores:
135
- output.append(f"=== ARGUMENT {i} (Confidence: {unit['confidence_score']:.3f}) ===")
136
- else:
137
- output.append(f"=== ARGUMENT {i} ===")
138
-
139
- if debug:
140
- # Show lengths and verification status for debugging
141
- arg_len = len(unit['argument']) if unit['argument'] != 'NA' else 0
142
- claim_len = len(unit['claim']) if unit['claim'] != 'NA' else 0
143
- exp_len = len(unit['explanation']) if unit['explanation'] != 'NA' else 0
144
- output.append(f"[DEBUG: arg_len={arg_len}, claim_len={claim_len}, exp_len={exp_len}, verification={unit['human_verification_needed']}]")
145
-
146
- output.append(f"<argument>{unit['argument']}</argument>")
147
- output.append(f"<claim>{unit['claim']}</claim>")
148
- output.append(f"<explanation>{unit['explanation']}</explanation>")
149
- output.append(f"<human_verification_needed>{unit['raw_verification']}</human_verification_needed>")
150
- output.append("")
151
-
152
- return "\n".join(output)
153
-
154
 
155
  @spaces.GPU
156
- def extract_arguments(text, temperature=0.1, confidence_threshold=0.6, show_all=False):
 
 
 
157
  if not text or not text.strip():
158
- return "", "Please enter some text to analyze."
 
 
 
159
 
160
  try:
161
  prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
@@ -181,7 +135,7 @@ Extract arguments from historical text.
181
  except:
182
  temperature = 0.1
183
 
184
- print(f"DEBUG: Generating with temperature {temperature}")
185
 
186
  with torch.no_grad():
187
  outputs = model.generate(
@@ -197,78 +151,103 @@ Extract arguments from historical text.
197
  generated_tokens = outputs[0][input_length:]
198
  response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
199
 
200
- print(f"DEBUG: Raw response length: {len(response)}")
201
- print(f"DEBUG: Response starts with: {response[:100]}")
202
-
203
  # Fix XML start
204
  if not response.startswith('<argument>'):
205
  arg_start = response.find('<argument>')
206
  if arg_start != -1:
207
  response = response[arg_start:]
208
- print(f"DEBUG: Fixed response start, new length: {len(response)}")
209
 
210
- # Parse and filter arguments
211
- units = parse_argumentative_units(response)
212
 
213
- if not units:
214
- print("DEBUG: No units found, returning raw response")
215
- return response, f"Raw output returned (no parseable argumentative units found)"
216
 
217
- print(f"DEBUG: Found {len(units)} units, applying confidence filtering")
218
 
219
- try:
220
- high_confidence_units, all_units = filter_high_confidence_arguments(units, confidence_threshold)
221
- print(f"DEBUG: {len(high_confidence_units)} high confidence units")
222
- except Exception as e:
223
- print(f"DEBUG: Error in confidence filtering: {str(e)}")
224
- return response, f"Error in confidence filtering: {str(e)}"
225
 
226
- if show_all:
227
- # Show all units with confidence scores
228
- all_output = []
229
- for i, unit in enumerate(all_units, 1):
230
- status = "✓ HIGH CONFIDENCE" if unit['confidence_score'] >= confidence_threshold else "⚠ LOW CONFIDENCE"
231
- all_output.append(f"=== ARGUMENT {i} - {status} (Score: {unit['confidence_score']:.3f}) ===")
232
- all_output.append(f"<argument>{unit['argument']}</argument>")
233
- all_output.append(f"<claim>{unit['claim']}</claim>")
234
- all_output.append(f"<explanation>{unit['explanation']}</explanation>")
235
- all_output.append(f"<human_verification_needed>{unit['raw_verification']}</human_verification_needed>")
236
- all_output.append("")
237
-
238
- return "\n".join(all_output), f"Found {len(units)} total units, {len(high_confidence_units)} high-confidence"
239
 
240
- else:
241
- # Show only high-confidence units
242
- filtered_output = format_filtered_output(high_confidence_units, show_scores=True)
243
- return filtered_output, f"Showing {len(high_confidence_units)}/{len(units)} high-confidence arguments (threshold: {confidence_threshold})"
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
 
245
  except Exception as e:
246
- error_msg = f"Error during processing: {str(e)}"
247
- print(f"DEBUG: {error_msg}")
248
- return error_msg, "Processing failed - check console for details"
249
 
250
-
251
- # Gradio interface
252
- demo = gr.Interface(
253
- fn=extract_arguments,
254
- inputs=[
255
- gr.Textbox(label="Input Text", placeholder="Enter newspaper text here...", lines=10),
256
- gr.Slider(minimum=0.01, maximum=0.3, value=0.1, step=0.01, label="Temperature (lower = more consistent)"),
257
- gr.Slider(minimum=0.3, maximum=0.9, value=0.6, step=0.05, label="Confidence Threshold (higher = more selective)"),
258
- gr.Checkbox(label="Show All Arguments (including low confidence)", value=False)
259
- ],
260
- outputs=[
261
- gr.Textbox(label="Filtered Arguments", lines=12),
262
- gr.Textbox(label="Summary", lines=1)
263
- ],
264
- title="Newspaper Argumentative Unit Extractor with Confidence Filtering",
265
- description="Extract and filter argumentative units from news sources based on confidence scores",
266
- examples=[
267
- ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1, 0.6, False],
268
- ["The bourses and theatres are closed. In every quarter help committees have been estab lished. A central committee has been organised at Rome for the purpose of privately and publicly collecting donations, and organising relief expedi tions to the afflicted districts. The Duke of Aosta has accepted the presidency. From all parts of the globe come telegrams of sympathy. The entire press has founded relief funds. Every Ambassador and Minister in Rome personally visited the Ministry of the Exterior yesterday morning, and expressed sympathy on behalf of their respective countries. Doctors, firemen, and municipal guards have been despatched to Messina and Calabria from many Italian towns. The Lombard Bank of Milan has already distributed 250,000 lire to sufferers from the earthquake, and the city of Milan has sent 25 firemen to Messina. A curious result of the earthquake is that the craters of Aetna, Vesuvius, and Stromboli ceased their activity immediately after the shock. It is reported from Malta that the British war- ships \"Exmouth,\" \"Euryalus,\" \"Minerva,\" and \"Sutlej\" have left for Messina. The French Government has sent two armoured ships and three destroyers to Messina. President Fallieres, Premier Clemenceau, Minister Pichon, and the Presidents of the Senate and Chamber have all sent messages of sympathy to the Italian Government. Palermo, December 30. Yesterday evening the first official telegraphic des- patches from the prefect of Messina reached here, They state that the catastrophe is beyond human description. Many thousands of people are known to have perished. It is impossible, says the prefect, to accurately relate the frightful scenes witnessed. The help already proffered and accepted is insuffi cient for the purpose. There is pressing need of extraordinary measures of help, and provisions are in great demand. At the time of wiring the fires in many parts of the ruined city have not been got under control, and are spreading in many directions. Catania, December 30. A survivor from the catastrophe at Messina who has arrived here says: \"It is impossible to describe the appalling scene. The city has been transformed into a vast heap of ruins. Almost all the inhabitants were killed; only a few thousands escaped death. There is need of doctors, tents, clothing, and pro visions for the survivors, who, deprived of all ne cessaries, are exposed to the inclemencies of the winter weather. There is need of fire engines to cope with the flames that are raging among the ruins. Messina appears as if it had been swept away by the earthquake. The railway station has collapsed. Railway carriages have been destroyed. Almost all the railway employes are dead. The streets are no longer recognisable; they look like enormous fissures in a distant and extensive heap of ruins. The Uni versity, the Post and Telegraph Office, and all the other public buildings have disappeared. The gas mains are entirely destroyed. For hours after the catastrophe the town was without any help, as the authorities, the garrison, the doctors, and apothe* * caries,—in short, all classes of the population, wjere buried under the ruins.\" Three more trains and** a steamer have left Messina with vjarKled and gitives. Bremen, In cembcr 30, Information has been recei\ o, i rom the Rprt authorities at Naples that the ! ' -house in the Straits of Messina has been destrc* - ' It seems doubtful whether the navigation of ■ < l traits will be pos sible without risk. The N r.h German Lloyd has therefore ordered all its ship commanders to avoid the Straits. All communicauon with Sicily is inter rupted. Rome, December 30. Newspaper reports from Catanzaro state that the prefect of. Reggio, who was believed to have pe rished, has arrived there and says that he managed to escape from the prefecture when the greater part of the building had fallen in. The surrounding streets and the centre of the town down to the harbour have been totally destroyed. Only the small villas clustering in the hills surrounding the town and on the Promenade of Reggio and Campi are intact. The castle, the cathedral, and the Lyceum all collapsed, and practically every student in the Ly ceum met his or her death. The prefect adds that he believes the Bishop to be dead. The barracks fell in, burying hundreds of soldiers. Reports as to the fate of the council house are contradictory. All the fugitives from Reggio describe the disaster as frightful, and estimate the number of dead as (Continued on page 3)", 0.1, 0.6, False]
269
- ]
270
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  if __name__ == "__main__":
273
- demo.launch()
274
-
 
4
  import gradio as gr
5
  import torch
6
  import re
7
+ import json
8
+ import datetime
9
+ import logging
10
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
11
  import spaces
12
 
13
+ # Configure logging
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(levelname)s - %(message)s',
17
+ handlers=[
18
+ logging.FileHandler('argument_extraction.log'),
19
+ logging.StreamHandler()
20
+ ]
21
+ )
22
+ logger = logging.getLogger(__name__)
23
+
24
  # Model configuration
25
  MODEL_ID = "oberbics/newspaper-argument-mining-V1"
26
 
 
55
  - More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
56
 
57
  print("Loading tokenizer...")
58
+ logger.info("Starting tokenizer loading")
59
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
60
  tokenizer.pad_token = tokenizer.eos_token
61
+ logger.info("Tokenizer loaded successfully")
62
 
63
  print("Loading model...")
64
+ logger.info("Starting model loading")
65
  bnb_config = BitsAndBytesConfig(
66
  load_in_4bit=True,
67
  bnb_4bit_quant_type="nf4",
 
77
  trust_remote_code=True
78
  )
79
  print("Model loaded successfully!")
80
+ logger.info("Model loaded successfully")
81
+
82
+ def log_interaction(input_text, temperature, output, processing_time, error=None):
83
+ """Log each interaction to JSON file for analysis"""
84
+ log_entry = {
85
+ "timestamp": datetime.datetime.now().isoformat(),
86
+ "input_length": len(input_text) if input_text else 0,
87
+ "input_preview": input_text[:100] if input_text else "",
88
+ "temperature": temperature,
89
+ "output_length": len(output) if output else 0,
90
+ "processing_time_seconds": processing_time,
91
+ "has_error": error is not None,
92
+ "error_message": str(error) if error else None,
93
+ "output_preview": output[:200] if output else ""
94
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # Save to JSON file
97
+ try:
98
+ with open('interaction_logs.json', 'a') as f:
99
+ f.write(json.dumps(log_entry) + '\n')
100
+ except Exception as e:
101
+ logger.error(f"Failed to save interaction log: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  @spaces.GPU
104
+ def extract_arguments(text, temperature=0.1):
105
+ start_time = datetime.datetime.now()
106
+ logger.info(f"Processing request - Input length: {len(text) if text else 0}, Temperature: {temperature}")
107
+
108
  if not text or not text.strip():
109
+ error_msg = "Please enter some text to analyze."
110
+ logger.warning("Empty input received")
111
+ log_interaction(text, temperature, "", 0, error_msg)
112
+ return "", error_msg
113
 
114
  try:
115
  prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 
135
  except:
136
  temperature = 0.1
137
 
138
+ logger.info(f"Starting model generation with {input_length} input tokens")
139
 
140
  with torch.no_grad():
141
  outputs = model.generate(
 
151
  generated_tokens = outputs[0][input_length:]
152
  response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
153
 
 
 
 
154
  # Fix XML start
155
  if not response.startswith('<argument>'):
156
  arg_start = response.find('<argument>')
157
  if arg_start != -1:
158
  response = response[arg_start:]
 
159
 
160
+ processing_time = (datetime.datetime.now() - start_time).total_seconds()
161
+ logger.info(f"Processing completed in {processing_time:.2f} seconds - Output length: {len(response)}")
162
 
163
+ # Log successful interaction
164
+ log_interaction(text, temperature, response, processing_time)
 
165
 
166
+ return response
167
 
168
+ except Exception as e:
169
+ processing_time = (datetime.datetime.now() - start_time).total_seconds()
170
+ error_msg = f"Error during processing: {str(e)}"
171
+ logger.error(f"Processing failed after {processing_time:.2f} seconds: {e}")
 
 
172
 
173
+ # Log failed interaction
174
+ log_interaction(text, temperature, "", processing_time, e)
 
 
 
 
 
 
 
 
 
 
 
175
 
176
+ return error_msg
177
+
178
+ def get_logs():
179
+ """Function to view recent logs"""
180
+ try:
181
+ with open('interaction_logs.json', 'r') as f:
182
+ lines = f.readlines()
183
+ recent_logs = lines[-10:] # Last 10 interactions
184
+
185
+ log_summary = []
186
+ for line in recent_logs:
187
+ entry = json.loads(line)
188
+ confidence_info = f", Confidence: {entry['confidence_score']:.3f}" if entry.get('confidence_score') else ""
189
+ summary = f"[{entry['timestamp']}] Input: {entry['input_length']} chars, Output: {entry['output_length']} chars, Time: {entry['processing_time_seconds']:.2f}s{confidence_info}"
190
+ if entry['has_error']:
191
+ summary += f" ERROR: {entry['error_message']}"
192
+ log_summary.append(summary)
193
 
194
+ return "\n".join(log_summary)
195
  except Exception as e:
196
+ return f"Error reading logs: {e}"
 
 
197
 
198
+ # Gradio interface with logging viewer
199
+ with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
200
+ gr.Markdown("# Newspaper Argumentative Unit Extractor")
201
+ gr.Markdown("Extract argumentative units from news sources")
202
+
203
+ with gr.Tab("Extract Arguments"):
204
+ with gr.Row():
205
+ with gr.Column():
206
+ input_text = gr.Textbox(
207
+ label="Input Text",
208
+ placeholder="Enter newspaper text here...",
209
+ lines=10
210
+ )
211
+ temperature = gr.Slider(
212
+ minimum=0.01,
213
+ maximum=0.3,
214
+ value=0.1,
215
+ step=0.01,
216
+ label="Temperature (lower = more consistent)"
217
+ )
218
+ extract_btn = gr.Button("Extract Arguments", variant="primary")
219
+
220
+ with gr.Column():
221
+ output_text = gr.Textbox(
222
+ label="Raw XML Output",
223
+ lines=8
224
+ )
225
+
226
+ extract_btn.click(
227
+ fn=extract_arguments,
228
+ inputs=[input_text, temperature],
229
+ outputs=[output_text]
230
+ )
231
+
232
+ # Examples
233
+ gr.Examples(
234
+ examples=[
235
+ ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1],
236
+ ],
237
+ inputs=[input_text, temperature],
238
+ outputs=[output_text],
239
+ fn=extract_arguments
240
+ )
241
+
242
+ with gr.Tab("Logs"):
243
+ gr.Markdown("## Recent Activity Logs")
244
+ log_display = gr.Textbox(
245
+ label="Recent Interactions",
246
+ lines=15,
247
+ value=get_logs()
248
+ )
249
+ refresh_btn = gr.Button("Refresh Logs")
250
+ refresh_btn.click(fn=get_logs, outputs=[log_display])
251
 
252
  if __name__ == "__main__":
253
+ demo.launch()