Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,10 +21,8 @@ except OSError:
|
|
| 21 |
os.system("python -m spacy download en_core_web_sm")
|
| 22 |
nlp = spacy.load("en_core_web_sm")
|
| 23 |
|
| 24 |
-
# Global variables
|
| 25 |
generated_summaries = {}
|
| 26 |
|
| 27 |
-
# Regulatory data
|
| 28 |
RISK_KEYWORDS = [
|
| 29 |
"penalty", "breach", "liability", "default", "hidden obligations",
|
| 30 |
"indemnity", "terms of service", "non-compliance", "force majeure"
|
|
@@ -207,23 +205,29 @@ Please find the attached PDF summary for your review.
|
|
| 207 |
except Exception as e:
|
| 208 |
return f"Failed to send email: {str(e)}"
|
| 209 |
|
| 210 |
-
def
|
| 211 |
-
if not
|
| 212 |
return "Please upload files and select tasks."
|
| 213 |
|
| 214 |
results = {}
|
| 215 |
all_text_chunks = {}
|
| 216 |
|
| 217 |
-
for
|
| 218 |
-
file_name = os.path.basename(
|
| 219 |
file_extension = os.path.splitext(file_name)[1].lower()
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
if file_extension == '.pdf':
|
| 222 |
-
text = extract_text_from_pdf(
|
| 223 |
elif file_extension == '.txt':
|
| 224 |
-
text = extract_text_from_txt(
|
| 225 |
else:
|
| 226 |
-
results[file_name] = "Unsupported file format"
|
|
|
|
| 227 |
continue
|
| 228 |
|
| 229 |
text_chunks = split_text_into_chunks(text)
|
|
@@ -243,6 +247,9 @@ def process_files(files, tasks):
|
|
| 243 |
results[file_name] = file_results
|
| 244 |
generated_summaries[file_name] = file_results
|
| 245 |
|
|
|
|
|
|
|
|
|
|
| 246 |
return results, all_text_chunks
|
| 247 |
|
| 248 |
def display_results(results):
|
|
@@ -282,6 +289,38 @@ def answer_questions(file_name, question):
|
|
| 282 |
else:
|
| 283 |
return "No summary available for the selected file."
|
| 284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
with gr.Blocks(title="AI-Driven Legal Document Analysis") as app:
|
| 286 |
gr.Markdown("""
|
| 287 |
# 📜 Advanced AI-Driven Legal Document Summarization and Risk Assessment
|
|
@@ -298,7 +337,8 @@ with gr.Blocks(title="AI-Driven Legal Document Analysis") as app:
|
|
| 298 |
with gr.Tab("Process Documents"):
|
| 299 |
with gr.Row():
|
| 300 |
with gr.Column():
|
| 301 |
-
|
|
|
|
| 302 |
task_checkboxes = gr.CheckboxGroup(
|
| 303 |
["Summarize", "Extract Clauses", "Risk Detection", "Regulatory Update Tracker"],
|
| 304 |
label="Choose Tasks"
|
|
@@ -312,7 +352,7 @@ with gr.Blocks(title="AI-Driven Legal Document Analysis") as app:
|
|
| 312 |
with gr.Tab("Email Summary"):
|
| 313 |
with gr.Row():
|
| 314 |
with gr.Column():
|
| 315 |
-
file_task_dropdown = gr.Dropdown(label="Select a Task Summary to Send"
|
| 316 |
receiver_email = gr.Textbox(label="Receiver Email")
|
| 317 |
email_subject = gr.Textbox(label="Email Subject", value="Legal Document Summary")
|
| 318 |
sender_email = gr.Textbox(label="Sender Email (Gmail)", value="your_email@gmail.com")
|
|
@@ -325,49 +365,16 @@ with gr.Blocks(title="AI-Driven Legal Document Analysis") as app:
|
|
| 325 |
with gr.Tab("Ask Questions"):
|
| 326 |
with gr.Row():
|
| 327 |
with gr.Column():
|
| 328 |
-
file_dropdown = gr.Dropdown(label="Select a File Summary"
|
| 329 |
question = gr.Textbox(label="Ask a Question")
|
| 330 |
ask_btn = gr.Button("Get Answer")
|
| 331 |
|
| 332 |
with gr.Column():
|
| 333 |
answer = gr.Textbox(label="Answer")
|
| 334 |
|
| 335 |
-
def
|
| 336 |
-
|
| 337 |
-
return None, "Please upload files and select tasks."
|
| 338 |
-
|
| 339 |
-
# Create progress visualization
|
| 340 |
-
progress_data = []
|
| 341 |
-
fig, ax = plt.subplots(figsize=(8, 4))
|
| 342 |
-
ax.set_title("Processing Progress")
|
| 343 |
-
ax.set_xlabel("Steps")
|
| 344 |
-
ax.set_ylabel("Progress (%)")
|
| 345 |
-
ax.grid(True)
|
| 346 |
-
|
| 347 |
-
total_steps = len(files) * len(tasks)
|
| 348 |
-
current_step = 0
|
| 349 |
-
|
| 350 |
-
results, all_text_chunks = process_files(files, tasks)
|
| 351 |
|
| 352 |
-
file_task_choices = [f"{file_name} - {task}" for file_name, file_results in results.items() for task in file_results.keys()]
|
| 353 |
-
file_choices = list(results.keys())
|
| 354 |
-
|
| 355 |
-
# For each step, update progress
|
| 356 |
-
for i in range(total_steps):
|
| 357 |
-
current_step += 1
|
| 358 |
-
progress = (current_step / total_steps) * 100
|
| 359 |
-
progress_data.append(progress)
|
| 360 |
-
|
| 361 |
-
ax.clear()
|
| 362 |
-
ax.plot(progress_data, color="blue", marker="o")
|
| 363 |
-
ax.set_title("Processing Progress")
|
| 364 |
-
ax.set_xlabel("Steps")
|
| 365 |
-
ax.set_ylabel("Progress (%)")
|
| 366 |
-
ax.set_ylim(0, 100)
|
| 367 |
-
ax.grid(True)
|
| 368 |
-
|
| 369 |
-
yield fig, display_results(results), gr.Dropdown.update(choices=file_task_choices), gr.Dropdown.update(choices=file_choices)
|
| 370 |
-
|
| 371 |
process_btn.click(
|
| 372 |
process_with_progress,
|
| 373 |
inputs=[files, task_checkboxes],
|
|
|
|
| 21 |
os.system("python -m spacy download en_core_web_sm")
|
| 22 |
nlp = spacy.load("en_core_web_sm")
|
| 23 |
|
|
|
|
| 24 |
generated_summaries = {}
|
| 25 |
|
|
|
|
| 26 |
RISK_KEYWORDS = [
|
| 27 |
"penalty", "breach", "liability", "default", "hidden obligations",
|
| 28 |
"indemnity", "terms of service", "non-compliance", "force majeure"
|
|
|
|
| 205 |
except Exception as e:
|
| 206 |
return f"Failed to send email: {str(e)}"
|
| 207 |
|
| 208 |
+
def process_uploaded_files(file_obj_list, tasks):
|
| 209 |
+
if not file_obj_list or not tasks:
|
| 210 |
return "Please upload files and select tasks."
|
| 211 |
|
| 212 |
results = {}
|
| 213 |
all_text_chunks = {}
|
| 214 |
|
| 215 |
+
for file_obj in file_obj_list:
|
| 216 |
+
file_name = os.path.basename(file_obj.name)
|
| 217 |
file_extension = os.path.splitext(file_name)[1].lower()
|
| 218 |
|
| 219 |
+
# Create a temporary file
|
| 220 |
+
temp_file_path = f"temp_{file_name}"
|
| 221 |
+
with open(temp_file_path, "wb") as temp_file:
|
| 222 |
+
temp_file.write(file_obj.read())
|
| 223 |
+
|
| 224 |
if file_extension == '.pdf':
|
| 225 |
+
text = extract_text_from_pdf(temp_file_path)
|
| 226 |
elif file_extension == '.txt':
|
| 227 |
+
text = extract_text_from_txt(temp_file_path)
|
| 228 |
else:
|
| 229 |
+
results[file_name] = {"Error": "Unsupported file format"}
|
| 230 |
+
os.remove(temp_file_path)
|
| 231 |
continue
|
| 232 |
|
| 233 |
text_chunks = split_text_into_chunks(text)
|
|
|
|
| 247 |
results[file_name] = file_results
|
| 248 |
generated_summaries[file_name] = file_results
|
| 249 |
|
| 250 |
+
# Clean up
|
| 251 |
+
os.remove(temp_file_path)
|
| 252 |
+
|
| 253 |
return results, all_text_chunks
|
| 254 |
|
| 255 |
def display_results(results):
|
|
|
|
| 289 |
else:
|
| 290 |
return "No summary available for the selected file."
|
| 291 |
|
| 292 |
+
def process_with_progress(file_obj_list, tasks):
|
| 293 |
+
if not file_obj_list or not tasks:
|
| 294 |
+
return None, "Please upload files and select tasks."
|
| 295 |
+
|
| 296 |
+
fig, ax = plt.subplots(figsize=(8, 4))
|
| 297 |
+
ax.set_title("Processing Progress")
|
| 298 |
+
ax.set_xlabel("Steps")
|
| 299 |
+
ax.set_ylabel("Progress (%)")
|
| 300 |
+
ax.grid(True)
|
| 301 |
+
|
| 302 |
+
progress_data = [0]
|
| 303 |
+
ax.plot(progress_data, color="blue", marker="o")
|
| 304 |
+
|
| 305 |
+
# Process files
|
| 306 |
+
results, all_text_chunks = process_uploaded_files(file_obj_list, tasks)
|
| 307 |
+
|
| 308 |
+
# Update progress to 100%
|
| 309 |
+
progress_data.append(100)
|
| 310 |
+
ax.clear()
|
| 311 |
+
ax.plot(progress_data, color="blue", marker="o")
|
| 312 |
+
ax.set_title("Processing Progress")
|
| 313 |
+
ax.set_xlabel("Steps")
|
| 314 |
+
ax.set_ylabel("Progress (%)")
|
| 315 |
+
ax.set_ylim(0, 100)
|
| 316 |
+
ax.grid(True)
|
| 317 |
+
|
| 318 |
+
# Generate options for dropdowns
|
| 319 |
+
file_task_choices = [f"{file_name} - {task}" for file_name, file_results in results.items() for task in file_results.keys()]
|
| 320 |
+
file_choices = list(results.keys())
|
| 321 |
+
|
| 322 |
+
return fig, display_results(results), file_task_choices, file_choices
|
| 323 |
+
|
| 324 |
with gr.Blocks(title="AI-Driven Legal Document Analysis") as app:
|
| 325 |
gr.Markdown("""
|
| 326 |
# 📜 Advanced AI-Driven Legal Document Summarization and Risk Assessment
|
|
|
|
| 337 |
with gr.Tab("Process Documents"):
|
| 338 |
with gr.Row():
|
| 339 |
with gr.Column():
|
| 340 |
+
# Fixed: Using File() without 'multiple' parameter
|
| 341 |
+
files = gr.File(label="Upload PDFs or Text Files", file_types=["pdf", "txt"])
|
| 342 |
task_checkboxes = gr.CheckboxGroup(
|
| 343 |
["Summarize", "Extract Clauses", "Risk Detection", "Regulatory Update Tracker"],
|
| 344 |
label="Choose Tasks"
|
|
|
|
| 352 |
with gr.Tab("Email Summary"):
|
| 353 |
with gr.Row():
|
| 354 |
with gr.Column():
|
| 355 |
+
file_task_dropdown = gr.Dropdown(label="Select a Task Summary to Send")
|
| 356 |
receiver_email = gr.Textbox(label="Receiver Email")
|
| 357 |
email_subject = gr.Textbox(label="Email Subject", value="Legal Document Summary")
|
| 358 |
sender_email = gr.Textbox(label="Sender Email (Gmail)", value="your_email@gmail.com")
|
|
|
|
| 365 |
with gr.Tab("Ask Questions"):
|
| 366 |
with gr.Row():
|
| 367 |
with gr.Column():
|
| 368 |
+
file_dropdown = gr.Dropdown(label="Select a File Summary")
|
| 369 |
question = gr.Textbox(label="Ask a Question")
|
| 370 |
ask_btn = gr.Button("Get Answer")
|
| 371 |
|
| 372 |
with gr.Column():
|
| 373 |
answer = gr.Textbox(label="Answer")
|
| 374 |
|
| 375 |
+
def update_dropdowns(file_task_choices, file_choices):
|
| 376 |
+
return gr.Dropdown.update(choices=file_task_choices), gr.Dropdown.update(choices=file_choices)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
process_btn.click(
|
| 379 |
process_with_progress,
|
| 380 |
inputs=[files, task_checkboxes],
|