oberbics commited on
Commit
78595fd
·
verified ·
1 Parent(s): f81907c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -14
app.py CHANGED
@@ -24,6 +24,9 @@ logger = logging.getLogger(__name__)
24
  # Model configuration
25
  MODEL_ID = "oberbics/newspaper-argument-mining-V1"
26
 
 
 
 
27
  SYSTEM_PROMPT = """You are an expert at analyzing historical texts and you hate to summarize
28
 
29
  OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
@@ -79,7 +82,33 @@ model = AutoModelForCausalLM.from_pretrained(
79
  print("Model loaded successfully!")
80
  logger.info("Model loaded successfully")
81
 
82
- def log_interaction(input_text, temperature, output, processing_time, error=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  """Log each interaction to JSON file for analysis"""
84
  log_entry = {
85
  "timestamp": datetime.datetime.now().isoformat(),
@@ -90,7 +119,8 @@ def log_interaction(input_text, temperature, output, processing_time, error=None
90
  "processing_time_seconds": processing_time,
91
  "has_error": error is not None,
92
  "error_message": str(error) if error else None,
93
- "output_preview": output[:200] if output else ""
 
94
  }
95
 
96
  # Save to JSON file
@@ -101,9 +131,9 @@ def log_interaction(input_text, temperature, output, processing_time, error=None
101
  logger.error(f"Failed to save interaction log: {e}")
102
 
103
  @spaces.GPU
104
- def extract_arguments(text, temperature=0.1):
105
  start_time = datetime.datetime.now()
106
- logger.info(f"Processing request - Input length: {len(text) if text else 0}, Temperature: {temperature}")
107
 
108
  if not text or not text.strip():
109
  error_msg = "Please enter some text to analyze."
@@ -157,13 +187,16 @@ Extract arguments from historical text.
157
  if arg_start != -1:
158
  response = response[arg_start:]
159
 
 
 
 
160
  processing_time = (datetime.datetime.now() - start_time).total_seconds()
161
- logger.info(f"Processing completed in {processing_time:.2f} seconds - Output length: {len(response)}")
162
 
163
  # Log successful interaction
164
- log_interaction(text, temperature, response, processing_time)
165
 
166
- return response
167
 
168
  except Exception as e:
169
  processing_time = (datetime.datetime.now() - start_time).total_seconds()
@@ -186,8 +219,9 @@ def get_logs():
186
  for line in recent_logs:
187
  entry = json.loads(line)
188
  confidence_info = f", Confidence: {entry['confidence_score']:.3f}" if entry.get('confidence_score') else ""
 
189
  rejected_info = " [REJECTED]" if entry.get('rejected') else ""
190
- summary = f"[{entry['timestamp']}] Input: {entry['input_length']} chars, Output: {entry['output_length']} chars, Time: {entry['processing_time_seconds']:.2f}s{confidence_info}{rejected_info}"
191
  if entry['has_error']:
192
  summary += f" ERROR: {entry['error_message']}"
193
  log_summary.append(summary)
@@ -196,10 +230,10 @@ def get_logs():
196
  except Exception as e:
197
  return f"Error reading logs: {e}"
198
 
199
- # Gradio interface with logging viewer
200
  with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
201
  gr.Markdown("# Newspaper Argumentative Unit Extractor")
202
- gr.Markdown("Extract argumentative units from news sources")
203
 
204
  with gr.Tab("Extract Arguments"):
205
  with gr.Row():
@@ -216,6 +250,13 @@ with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
216
  step=0.01,
217
  label="Temperature (lower = more consistent)"
218
  )
 
 
 
 
 
 
 
219
  extract_btn = gr.Button("Extract Arguments", variant="primary")
220
 
221
  with gr.Column():
@@ -226,16 +267,16 @@ with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
226
 
227
  extract_btn.click(
228
  fn=extract_arguments,
229
- inputs=[input_text, temperature],
230
  outputs=[output_text]
231
  )
232
 
233
  # Examples
234
  gr.Examples(
235
  examples=[
236
- ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1],
237
  ],
238
- inputs=[input_text, temperature],
239
  outputs=[output_text],
240
  fn=extract_arguments
241
  )
@@ -251,4 +292,4 @@ with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
251
  refresh_btn.click(fn=get_logs, outputs=[log_display])
252
 
253
  if __name__ == "__main__":
254
- demo.launch()
 
24
  # Model configuration
25
  MODEL_ID = "oberbics/newspaper-argument-mining-V1"
26
 
27
+ # Add minimum length threshold for arguments
28
+ MIN_ARGUMENT_LENGTH = 50 # Adjust this value as needed
29
+
30
  SYSTEM_PROMPT = """You are an expert at analyzing historical texts and you hate to summarize
31
 
32
  OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
 
82
  print("Model loaded successfully!")
83
  logger.info("Model loaded successfully")
84
 
85
+ def parse_and_filter_arguments(response, min_length=MIN_ARGUMENT_LENGTH):
86
+ """Parse XML response and filter out arguments that are too short"""
87
+ try:
88
+ # Extract argument text using regex
89
+ argument_match = re.search(r'<argument>(.*?)</argument>', response, re.DOTALL)
90
+
91
+ if argument_match:
92
+ argument_text = argument_match.group(1).strip()
93
+
94
+ # Check if argument is meaningful and long enough
95
+ if argument_text and argument_text != "NA" and len(argument_text) < min_length:
96
+ logger.info(f"Argument filtered out due to length: {len(argument_text)} chars (min: {min_length})")
97
+
98
+ # Replace with NA format
99
+ filtered_response = """<argument>NA</argument>
100
+ <claim>NA</claim>
101
+ <explanation>NA</explanation>
102
+ <human_verification_needed>False</human_verification_needed>"""
103
+ return filtered_response, True # True indicates it was filtered
104
+
105
+ return response, False # False indicates no filtering
106
+
107
+ except Exception as e:
108
+ logger.error(f"Error parsing arguments: {e}")
109
+ return response, False
110
+
111
+ def log_interaction(input_text, temperature, output, processing_time, error=None, filtered=False):
112
  """Log each interaction to JSON file for analysis"""
113
  log_entry = {
114
  "timestamp": datetime.datetime.now().isoformat(),
 
119
  "processing_time_seconds": processing_time,
120
  "has_error": error is not None,
121
  "error_message": str(error) if error else None,
122
+ "output_preview": output[:200] if output else "",
123
+ "filtered_for_length": filtered
124
  }
125
 
126
  # Save to JSON file
 
131
  logger.error(f"Failed to save interaction log: {e}")
132
 
133
  @spaces.GPU
134
+ def extract_arguments(text, temperature=0.1, min_arg_length=MIN_ARGUMENT_LENGTH):
135
  start_time = datetime.datetime.now()
136
+ logger.info(f"Processing request - Input length: {len(text) if text else 0}, Temperature: {temperature}, Min argument length: {min_arg_length}")
137
 
138
  if not text or not text.strip():
139
  error_msg = "Please enter some text to analyze."
 
187
  if arg_start != -1:
188
  response = response[arg_start:]
189
 
190
+ # Filter out short arguments
191
+ filtered_response, was_filtered = parse_and_filter_arguments(response, min_arg_length)
192
+
193
  processing_time = (datetime.datetime.now() - start_time).total_seconds()
194
+ logger.info(f"Processing completed in {processing_time:.2f} seconds - Output length: {len(filtered_response)} - Filtered: {was_filtered}")
195
 
196
  # Log successful interaction
197
+ log_interaction(text, temperature, filtered_response, processing_time, filtered=was_filtered)
198
 
199
+ return filtered_response
200
 
201
  except Exception as e:
202
  processing_time = (datetime.datetime.now() - start_time).total_seconds()
 
219
  for line in recent_logs:
220
  entry = json.loads(line)
221
  confidence_info = f", Confidence: {entry['confidence_score']:.3f}" if entry.get('confidence_score') else ""
222
+ filtered_info = " [FILTERED]" if entry.get('filtered_for_length') else ""
223
  rejected_info = " [REJECTED]" if entry.get('rejected') else ""
224
+ summary = f"[{entry['timestamp']}] Input: {entry['input_length']} chars, Output: {entry['output_length']} chars, Time: {entry['processing_time_seconds']:.2f}s{confidence_info}{filtered_info}{rejected_info}"
225
  if entry['has_error']:
226
  summary += f" ERROR: {entry['error_message']}"
227
  log_summary.append(summary)
 
230
  except Exception as e:
231
  return f"Error reading logs: {e}"
232
 
233
+ # Gradio interface with logging viewer and length control
234
  with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
235
  gr.Markdown("# Newspaper Argumentative Unit Extractor")
236
+ gr.Markdown("Extract argumentative units from news sources (filters out arguments shorter than specified length)")
237
 
238
  with gr.Tab("Extract Arguments"):
239
  with gr.Row():
 
250
  step=0.01,
251
  label="Temperature (lower = more consistent)"
252
  )
253
+ min_length = gr.Slider(
254
+ minimum=10,
255
+ maximum=200,
256
+ value=MIN_ARGUMENT_LENGTH,
257
+ step=5,
258
+ label="Minimum Argument Length (characters)"
259
+ )
260
  extract_btn = gr.Button("Extract Arguments", variant="primary")
261
 
262
  with gr.Column():
 
267
 
268
  extract_btn.click(
269
  fn=extract_arguments,
270
+ inputs=[input_text, temperature, min_length],
271
  outputs=[output_text]
272
  )
273
 
274
  # Examples
275
  gr.Examples(
276
  examples=[
277
+ ["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1, 50],
278
  ],
279
+ inputs=[input_text, temperature, min_length],
280
  outputs=[output_text],
281
  fn=extract_arguments
282
  )
 
292
  refresh_btn.click(fn=get_logs, outputs=[log_display])
293
 
294
  if __name__ == "__main__":
295
+ demo.launch()