Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,6 +24,9 @@ logger = logging.getLogger(__name__)
|
|
| 24 |
# Model configuration
|
| 25 |
MODEL_ID = "oberbics/newspaper-argument-mining-V1"
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
SYSTEM_PROMPT = """You are an expert at analyzing historical texts and you hate to summarize
|
| 28 |
|
| 29 |
OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
|
|
@@ -79,7 +82,33 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 79 |
print("Model loaded successfully!")
|
| 80 |
logger.info("Model loaded successfully")
|
| 81 |
|
| 82 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
"""Log each interaction to JSON file for analysis"""
|
| 84 |
log_entry = {
|
| 85 |
"timestamp": datetime.datetime.now().isoformat(),
|
|
@@ -90,7 +119,8 @@ def log_interaction(input_text, temperature, output, processing_time, error=None
|
|
| 90 |
"processing_time_seconds": processing_time,
|
| 91 |
"has_error": error is not None,
|
| 92 |
"error_message": str(error) if error else None,
|
| 93 |
-
"output_preview": output[:200] if output else ""
|
|
|
|
| 94 |
}
|
| 95 |
|
| 96 |
# Save to JSON file
|
|
@@ -101,9 +131,9 @@ def log_interaction(input_text, temperature, output, processing_time, error=None
|
|
| 101 |
logger.error(f"Failed to save interaction log: {e}")
|
| 102 |
|
| 103 |
@spaces.GPU
|
| 104 |
-
def extract_arguments(text, temperature=0.1):
|
| 105 |
start_time = datetime.datetime.now()
|
| 106 |
-
logger.info(f"Processing request - Input length: {len(text) if text else 0}, Temperature: {temperature}")
|
| 107 |
|
| 108 |
if not text or not text.strip():
|
| 109 |
error_msg = "Please enter some text to analyze."
|
|
@@ -157,13 +187,16 @@ Extract arguments from historical text.
|
|
| 157 |
if arg_start != -1:
|
| 158 |
response = response[arg_start:]
|
| 159 |
|
|
|
|
|
|
|
|
|
|
| 160 |
processing_time = (datetime.datetime.now() - start_time).total_seconds()
|
| 161 |
-
logger.info(f"Processing completed in {processing_time:.2f} seconds - Output length: {len(
|
| 162 |
|
| 163 |
# Log successful interaction
|
| 164 |
-
log_interaction(text, temperature,
|
| 165 |
|
| 166 |
-
return
|
| 167 |
|
| 168 |
except Exception as e:
|
| 169 |
processing_time = (datetime.datetime.now() - start_time).total_seconds()
|
|
@@ -186,8 +219,9 @@ def get_logs():
|
|
| 186 |
for line in recent_logs:
|
| 187 |
entry = json.loads(line)
|
| 188 |
confidence_info = f", Confidence: {entry['confidence_score']:.3f}" if entry.get('confidence_score') else ""
|
|
|
|
| 189 |
rejected_info = " [REJECTED]" if entry.get('rejected') else ""
|
| 190 |
-
summary = f"[{entry['timestamp']}] Input: {entry['input_length']} chars, Output: {entry['output_length']} chars, Time: {entry['processing_time_seconds']:.2f}s{confidence_info}{rejected_info}"
|
| 191 |
if entry['has_error']:
|
| 192 |
summary += f" ERROR: {entry['error_message']}"
|
| 193 |
log_summary.append(summary)
|
|
@@ -196,10 +230,10 @@ def get_logs():
|
|
| 196 |
except Exception as e:
|
| 197 |
return f"Error reading logs: {e}"
|
| 198 |
|
| 199 |
-
# Gradio interface with logging viewer
|
| 200 |
with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
|
| 201 |
gr.Markdown("# Newspaper Argumentative Unit Extractor")
|
| 202 |
-
gr.Markdown("Extract argumentative units from news sources")
|
| 203 |
|
| 204 |
with gr.Tab("Extract Arguments"):
|
| 205 |
with gr.Row():
|
|
@@ -216,6 +250,13 @@ with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
|
|
| 216 |
step=0.01,
|
| 217 |
label="Temperature (lower = more consistent)"
|
| 218 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
extract_btn = gr.Button("Extract Arguments", variant="primary")
|
| 220 |
|
| 221 |
with gr.Column():
|
|
@@ -226,16 +267,16 @@ with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
|
|
| 226 |
|
| 227 |
extract_btn.click(
|
| 228 |
fn=extract_arguments,
|
| 229 |
-
inputs=[input_text, temperature],
|
| 230 |
outputs=[output_text]
|
| 231 |
)
|
| 232 |
|
| 233 |
# Examples
|
| 234 |
gr.Examples(
|
| 235 |
examples=[
|
| 236 |
-
["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1],
|
| 237 |
],
|
| 238 |
-
inputs=[input_text, temperature],
|
| 239 |
outputs=[output_text],
|
| 240 |
fn=extract_arguments
|
| 241 |
)
|
|
@@ -251,4 +292,4 @@ with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
|
|
| 251 |
refresh_btn.click(fn=get_logs, outputs=[log_display])
|
| 252 |
|
| 253 |
if __name__ == "__main__":
|
| 254 |
-
demo.launch()
|
|
|
|
| 24 |
# Model configuration
|
| 25 |
MODEL_ID = "oberbics/newspaper-argument-mining-V1"
|
| 26 |
|
| 27 |
+
# Add minimum length threshold for arguments
|
| 28 |
+
MIN_ARGUMENT_LENGTH = 50 # Adjust this value as needed
|
| 29 |
+
|
| 30 |
SYSTEM_PROMPT = """You are an expert at analyzing historical texts and you hate to summarize
|
| 31 |
|
| 32 |
OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
|
|
|
|
| 82 |
print("Model loaded successfully!")
|
| 83 |
logger.info("Model loaded successfully")
|
| 84 |
|
| 85 |
+
def parse_and_filter_arguments(response, min_length=MIN_ARGUMENT_LENGTH):
|
| 86 |
+
"""Parse XML response and filter out arguments that are too short"""
|
| 87 |
+
try:
|
| 88 |
+
# Extract argument text using regex
|
| 89 |
+
argument_match = re.search(r'<argument>(.*?)</argument>', response, re.DOTALL)
|
| 90 |
+
|
| 91 |
+
if argument_match:
|
| 92 |
+
argument_text = argument_match.group(1).strip()
|
| 93 |
+
|
| 94 |
+
# Check if argument is meaningful and long enough
|
| 95 |
+
if argument_text and argument_text != "NA" and len(argument_text) < min_length:
|
| 96 |
+
logger.info(f"Argument filtered out due to length: {len(argument_text)} chars (min: {min_length})")
|
| 97 |
+
|
| 98 |
+
# Replace with NA format
|
| 99 |
+
filtered_response = """<argument>NA</argument>
|
| 100 |
+
<claim>NA</claim>
|
| 101 |
+
<explanation>NA</explanation>
|
| 102 |
+
<human_verification_needed>False</human_verification_needed>"""
|
| 103 |
+
return filtered_response, True # True indicates it was filtered
|
| 104 |
+
|
| 105 |
+
return response, False # False indicates no filtering
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
logger.error(f"Error parsing arguments: {e}")
|
| 109 |
+
return response, False
|
| 110 |
+
|
| 111 |
+
def log_interaction(input_text, temperature, output, processing_time, error=None, filtered=False):
|
| 112 |
"""Log each interaction to JSON file for analysis"""
|
| 113 |
log_entry = {
|
| 114 |
"timestamp": datetime.datetime.now().isoformat(),
|
|
|
|
| 119 |
"processing_time_seconds": processing_time,
|
| 120 |
"has_error": error is not None,
|
| 121 |
"error_message": str(error) if error else None,
|
| 122 |
+
"output_preview": output[:200] if output else "",
|
| 123 |
+
"filtered_for_length": filtered
|
| 124 |
}
|
| 125 |
|
| 126 |
# Save to JSON file
|
|
|
|
| 131 |
logger.error(f"Failed to save interaction log: {e}")
|
| 132 |
|
| 133 |
@spaces.GPU
|
| 134 |
+
def extract_arguments(text, temperature=0.1, min_arg_length=MIN_ARGUMENT_LENGTH):
|
| 135 |
start_time = datetime.datetime.now()
|
| 136 |
+
logger.info(f"Processing request - Input length: {len(text) if text else 0}, Temperature: {temperature}, Min argument length: {min_arg_length}")
|
| 137 |
|
| 138 |
if not text or not text.strip():
|
| 139 |
error_msg = "Please enter some text to analyze."
|
|
|
|
| 187 |
if arg_start != -1:
|
| 188 |
response = response[arg_start:]
|
| 189 |
|
| 190 |
+
# Filter out short arguments
|
| 191 |
+
filtered_response, was_filtered = parse_and_filter_arguments(response, min_arg_length)
|
| 192 |
+
|
| 193 |
processing_time = (datetime.datetime.now() - start_time).total_seconds()
|
| 194 |
+
logger.info(f"Processing completed in {processing_time:.2f} seconds - Output length: {len(filtered_response)} - Filtered: {was_filtered}")
|
| 195 |
|
| 196 |
# Log successful interaction
|
| 197 |
+
log_interaction(text, temperature, filtered_response, processing_time, filtered=was_filtered)
|
| 198 |
|
| 199 |
+
return filtered_response
|
| 200 |
|
| 201 |
except Exception as e:
|
| 202 |
processing_time = (datetime.datetime.now() - start_time).total_seconds()
|
|
|
|
| 219 |
for line in recent_logs:
|
| 220 |
entry = json.loads(line)
|
| 221 |
confidence_info = f", Confidence: {entry['confidence_score']:.3f}" if entry.get('confidence_score') else ""
|
| 222 |
+
filtered_info = " [FILTERED]" if entry.get('filtered_for_length') else ""
|
| 223 |
rejected_info = " [REJECTED]" if entry.get('rejected') else ""
|
| 224 |
+
summary = f"[{entry['timestamp']}] Input: {entry['input_length']} chars, Output: {entry['output_length']} chars, Time: {entry['processing_time_seconds']:.2f}s{confidence_info}{filtered_info}{rejected_info}"
|
| 225 |
if entry['has_error']:
|
| 226 |
summary += f" ERROR: {entry['error_message']}"
|
| 227 |
log_summary.append(summary)
|
|
|
|
| 230 |
except Exception as e:
|
| 231 |
return f"Error reading logs: {e}"
|
| 232 |
|
| 233 |
+
# Gradio interface with logging viewer and length control
|
| 234 |
with gr.Blocks(title="Newspaper Argumentative Unit Extractor") as demo:
|
| 235 |
gr.Markdown("# Newspaper Argumentative Unit Extractor")
|
| 236 |
+
gr.Markdown("Extract argumentative units from news sources (filters out arguments shorter than specified length)")
|
| 237 |
|
| 238 |
with gr.Tab("Extract Arguments"):
|
| 239 |
with gr.Row():
|
|
|
|
| 250 |
step=0.01,
|
| 251 |
label="Temperature (lower = more consistent)"
|
| 252 |
)
|
| 253 |
+
min_length = gr.Slider(
|
| 254 |
+
minimum=10,
|
| 255 |
+
maximum=200,
|
| 256 |
+
value=MIN_ARGUMENT_LENGTH,
|
| 257 |
+
step=5,
|
| 258 |
+
label="Minimum Argument Length (characters)"
|
| 259 |
+
)
|
| 260 |
extract_btn = gr.Button("Extract Arguments", variant="primary")
|
| 261 |
|
| 262 |
with gr.Column():
|
|
|
|
| 267 |
|
| 268 |
extract_btn.click(
|
| 269 |
fn=extract_arguments,
|
| 270 |
+
inputs=[input_text, temperature, min_length],
|
| 271 |
outputs=[output_text]
|
| 272 |
)
|
| 273 |
|
| 274 |
# Examples
|
| 275 |
gr.Examples(
|
| 276 |
examples=[
|
| 277 |
+
["Reggio, January 8. Frequent shocks of earthquake were felt here dur ing the night, accompanied at times by loud subter ranean reports. A few buildings that had not been completely destroyed were further damaged. The work of reconstructing the railway is being pushed forward energetically. News has been received from Brancaleone, Catanzaro, and Palmi of earthquakes by which the inhabitants were alarmed last night", 0.1, 50],
|
| 278 |
],
|
| 279 |
+
inputs=[input_text, temperature, min_length],
|
| 280 |
outputs=[output_text],
|
| 281 |
fn=extract_arguments
|
| 282 |
)
|
|
|
|
| 292 |
refresh_btn.click(fn=get_logs, outputs=[log_display])
|
| 293 |
|
| 294 |
if __name__ == "__main__":
|
| 295 |
+
demo.launch()
|