ashkoff commited on
Commit
96109b6
·
1 Parent(s): c939c55
Files changed (2) hide show
  1. app.py +206 -78
  2. logging-config.json +1 -1
app.py CHANGED
@@ -27,7 +27,7 @@ from graph import graph, model # noqa
27
 
28
  FOLLOWUP_QUESTION_NUMBER = 3
29
  TRIM_MESSAGE_LENGTH = 16 # Includes tool messages
30
- USER_INPUT_MAX_LENGTH = 10000 # Characters
31
  set_verbose(True)
32
  set_debug(True)
33
 
@@ -316,18 +316,26 @@ def find_story_files(idml_package, tag_patterns):
316
  Returns:
317
  dict: Mapping of tag patterns to story files
318
  """
 
319
  compiled_patterns = {pattern: re.compile(pattern) for pattern in tag_patterns}
320
  tag_to_story = {pattern: [] for pattern in tag_patterns}
321
  stories = [name for name in idml_package.namelist() if name.startswith("Stories/")]
322
 
 
 
323
  for story_path in stories:
324
  try:
325
  content = idml_package.open(story_path).read().decode("utf-8")
326
  for pattern, regex in compiled_patterns.items():
327
  if regex.search(content):
 
328
  tag_to_story[pattern].append(story_path)
329
  except Exception as e:
330
- print(f"Error reading {story_path}: {e}")
 
 
 
 
331
 
332
  return tag_to_story
333
 
@@ -344,12 +352,18 @@ def replace_content(xml_content, tag_pattern, replacements):
344
  Returns:
345
  str: Updated XML content
346
  """
 
 
 
 
347
  tags = re.finditer(tag_pattern, xml_content)
348
  tag_positions = [(m.start(), m.end()) for m in tags]
349
 
350
  if not tag_positions:
 
351
  return xml_content
352
 
 
353
  content_chars = list(xml_content)
354
 
355
  for i, (start, end) in enumerate(reversed(tag_positions)):
@@ -358,20 +372,31 @@ def replace_content(xml_content, tag_pattern, replacements):
358
  if index < len(replacements):
359
  # Replace with actual data
360
  new_content = f"<Content>{replacements[index]}</Content>"
 
 
 
361
  content_chars[start:end] = new_content
362
  else:
363
  br_pattern = r"\s*<Br />"
364
  br_match = re.search(br_pattern, "".join(content_chars[end : end + 20]))
365
  if br_match:
 
 
 
366
  del content_chars[start : end + br_match.end()]
367
  else:
 
368
  del content_chars[start:end]
369
 
370
  if len(replacements) > len(tag_positions) and tag_positions:
371
  last_pos = tag_positions[-1][1]
 
 
 
372
 
373
  for item in replacements[len(tag_positions) :]:
374
  insert_content = f"\n<Content>{item}</Content>\n<Br />"
 
375
  content_chars.insert(last_pos, insert_content)
376
  last_pos += len(insert_content)
377
 
@@ -388,6 +413,9 @@ def create_replacements_from_metrics(metrics_data):
388
  Returns:
389
  dict: Mapping of tag patterns to replacement values
390
  """
 
 
 
391
  # Define mappings between metrics keys and IDML tag patterns
392
  replacements = {
393
  # Project Description
@@ -422,6 +450,15 @@ def create_replacements_from_metrics(metrics_data):
422
  ],
423
  }
424
 
 
 
 
 
 
 
 
 
 
425
  return replacements
426
 
427
 
@@ -436,6 +473,7 @@ async def update_idml_content(idml_path, replacements_json):
436
  Returns:
437
  str: Path to the updated IDML file
438
  """
 
439
  # Parse JSON if it's a string
440
  if isinstance(replacements_json, str):
441
  replacements = json.loads(replacements_json)
@@ -444,89 +482,134 @@ async def update_idml_content(idml_path, replacements_json):
444
 
445
  # Get the directory where app.py is located
446
  app_dir = os.path.dirname(os.path.abspath(__file__))
 
447
 
448
  # Create a temporary directory
449
  with tempfile.TemporaryDirectory() as temp_dir:
 
450
  # Create a copy of the IDML file to work with
451
  temp_idml = os.path.join(temp_dir, "temp.idml")
452
- shutil.copy2(idml_path, temp_idml)
453
-
454
- with idml.IDMLPackage(temp_idml) as working_idml:
455
- # Find all story files containing our tags
456
- tag_patterns = list(replacements.keys())
457
- tag_to_story = find_story_files(working_idml, tag_patterns)
458
 
459
- # Extract the IDML
460
- extract_dir = os.path.join(temp_dir, "extracted")
461
- os.makedirs(extract_dir, exist_ok=True)
462
- working_idml.extractall(extract_dir)
 
 
 
 
 
463
 
464
- # Process each tag pattern
465
- for tag_pattern, replacement_values in replacements.items():
466
- story_files = tag_to_story.get(tag_pattern, [])
 
 
467
 
468
- if not story_files:
469
- print(
470
- f"Warning: No story files found containing pattern '{tag_pattern}'"
471
- )
472
- continue
473
 
474
- print(
475
- f"Found pattern '{tag_pattern}' in {len(story_files)} story file(s)"
476
- )
 
 
477
 
478
- # Update each story file containing this tag
479
- for story_path in story_files:
480
- # Read the XML content
481
- with open(
482
- os.path.join(extract_dir, story_path), "r", encoding="utf-8"
483
- ) as f:
484
- xml_content = f.read()
485
-
486
- # Update the content
487
- updated_content = replace_content(
488
- xml_content, tag_pattern, replacement_values
489
  )
490
 
491
- # Write back the updated content
492
- with open(
493
- os.path.join(extract_dir, story_path), "w", encoding="utf-8"
494
- ) as f:
495
- f.write(updated_content)
496
-
497
- # Create the output path in the same directory as app.py
498
- base_name = os.path.splitext(os.path.basename(idml_path))[0]
499
- output_filename = (
500
- f"{base_name}_filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.idml"
501
- )
502
- output_path = os.path.join(app_dir, output_filename)
 
 
 
503
 
504
- # Create a new IDML with the updated content
505
- shutil.make_archive(output_path, "zip", extract_dir)
506
- os.rename(output_path + ".zip", output_path)
 
 
 
 
 
 
 
 
 
 
507
 
508
- print(f"Updated IDML saved to: {output_path}")
509
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
 
511
 
512
  async def export_idml(graph_state: GraphProcessingState, table_data):
513
  """Export the current metrics, marketing copy, and table data to IDML file"""
 
514
  try:
515
  if "idml_file" not in graph_state:
 
516
  return None, "No IDML file uploaded"
517
 
518
  if "metrics" not in graph_state or "marketing_copy" not in graph_state:
 
519
  return None, "No metrics or marketing copy available"
520
 
 
 
 
 
 
521
  updated_data = dict(graph_state["metrics"])
 
522
 
523
  if table_data is not None and not table_data.empty:
524
  descriptions = table_data["description"].dropna().tolist()
525
  descriptions = [
526
  desc for desc in descriptions if desc.strip()
527
  ] # Remove empty strings
 
528
  else:
529
  descriptions = [""] # If no descriptions, create one empty file
 
530
 
531
  # Process each description and create IDML files
532
  output_paths = []
@@ -534,29 +617,58 @@ async def export_idml(graph_state: GraphProcessingState, table_data):
534
 
535
  # Process each file one at a time to avoid race conditions
536
  for i, text in enumerate(descriptions):
537
- if "Project Description" not in text:
538
- updated_data["description"] = text
539
- print(f"Processing description {i+1}/{len(descriptions)}: {text}")
540
- replacements = create_replacements_from_metrics(updated_data)
541
- output_path = await update_idml_content(
542
- graph_state["idml_file"], replacements
543
- )
544
- output_paths.append(output_path)
545
- # Brief pause to ensure unique timestamps
546
- await asyncio.sleep(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
 
548
- print(f"Generated {len(output_paths)} IDML files: {output_paths}")
549
  return output_paths, f"{len(output_paths)} IDML files successfully updated"
550
  except Exception as e:
551
  import traceback
552
 
553
- print(f"Error in export_idml: {str(e)}")
554
- print(traceback.format_exc())
555
  return None, f"Error updating IDML: {str(e)}"
556
 
557
- # Create placeholder data for the table
558
-
559
 
 
560
  placeholder_data = pd.DataFrame(
561
  {
562
  "description": [
@@ -576,7 +688,11 @@ with gr.Blocks(title="Transcript to Marketing Copy", fill_height=True, css=CSS)
576
  with gr.Row():
577
  with gr.Column(scale=4):
578
  chatbot = gr.Chatbot(type="messages", height=700, show_copy_button=True)
579
- chatbot.clear(fn=clear, outputs=[gradio_graph_state, uuid_state])
 
 
 
 
580
 
581
  multimodal = False
582
  textbox_component = gr.MultimodalTextbox if multimodal else gr.Textbox
@@ -636,23 +752,35 @@ with gr.Blocks(title="Transcript to Marketing Copy", fill_height=True, css=CSS)
636
  label="IDML Export Status",
637
  interactive=False,
638
  lines=2,
639
- visible=False,
640
  )
641
  idml_output = gr.File(
642
  label="Download Updated IDML",
643
  file_count="multiple",
644
  visible=True,
645
  )
646
- download_btn.click(
647
- fn=download_csv,
648
- inputs=[table],
649
- outputs=gr.File(label="Download CSV"),
650
- )
651
- export_idml_btn.click(
652
- fn=export_idml,
653
- inputs=[gradio_graph_state, table],
654
- outputs=[idml_output, idml_status],
655
- )
 
 
 
 
 
 
 
 
 
 
 
 
656
  metrics_display = gr.Textbox(
657
  label="Project Metrics", interactive=False, lines=1, scale=1
658
  )
 
27
 
28
  FOLLOWUP_QUESTION_NUMBER = 3
29
  TRIM_MESSAGE_LENGTH = 16 # Includes tool messages
30
+ USER_INPUT_MAX_LENGTH = 10001 # Characters
31
  set_verbose(True)
32
  set_debug(True)
33
 
 
316
  Returns:
317
  dict: Mapping of tag patterns to story files
318
  """
319
+ logger.info(f"Searching for {len(tag_patterns)} tag patterns in IDML files")
320
  compiled_patterns = {pattern: re.compile(pattern) for pattern in tag_patterns}
321
  tag_to_story = {pattern: [] for pattern in tag_patterns}
322
  stories = [name for name in idml_package.namelist() if name.startswith("Stories/")]
323
 
324
+ logger.info(f"Found {len(stories)} story files in IDML package")
325
+
326
  for story_path in stories:
327
  try:
328
  content = idml_package.open(story_path).read().decode("utf-8")
329
  for pattern, regex in compiled_patterns.items():
330
  if regex.search(content):
331
+ logger.info(f"Found pattern '{pattern}' in {story_path}")
332
  tag_to_story[pattern].append(story_path)
333
  except Exception as e:
334
+ logger.error(f"Error reading {story_path}: {e}")
335
+
336
+ # Log summary of matches
337
+ for pattern, story_files in tag_to_story.items():
338
+ logger.info(f"Pattern '{pattern}' found in {len(story_files)} story files")
339
 
340
  return tag_to_story
341
 
 
352
  Returns:
353
  str: Updated XML content
354
  """
355
+ logger.info(
356
+ f"Replacing content with pattern '{tag_pattern}' using {len(replacements)} replacements"
357
+ )
358
+
359
  tags = re.finditer(tag_pattern, xml_content)
360
  tag_positions = [(m.start(), m.end()) for m in tags]
361
 
362
  if not tag_positions:
363
+ logger.warning(f"No tags found with pattern '{tag_pattern}' in XML content")
364
  return xml_content
365
 
366
+ logger.info(f"Found {len(tag_positions)} matching tags to replace")
367
  content_chars = list(xml_content)
368
 
369
  for i, (start, end) in enumerate(reversed(tag_positions)):
 
372
  if index < len(replacements):
373
  # Replace with actual data
374
  new_content = f"<Content>{replacements[index]}</Content>"
375
+ logger.info(
376
+ f"Replacing tag at position {start}-{end} with content: {new_content[:50]}..."
377
+ )
378
  content_chars[start:end] = new_content
379
  else:
380
  br_pattern = r"\s*<Br />"
381
  br_match = re.search(br_pattern, "".join(content_chars[end : end + 20]))
382
  if br_match:
383
+ logger.info(
384
+ f"Removing tag at position {start}-{end} with following line break"
385
+ )
386
  del content_chars[start : end + br_match.end()]
387
  else:
388
+ logger.info(f"Removing tag at position {start}-{end}")
389
  del content_chars[start:end]
390
 
391
  if len(replacements) > len(tag_positions) and tag_positions:
392
  last_pos = tag_positions[-1][1]
393
+ logger.info(
394
+ f"Adding {len(replacements) - len(tag_positions)} additional replacements after position {last_pos}"
395
+ )
396
 
397
  for item in replacements[len(tag_positions) :]:
398
  insert_content = f"\n<Content>{item}</Content>\n<Br />"
399
+ logger.info(f"Inserting new content: {insert_content[:50]}...")
400
  content_chars.insert(last_pos, insert_content)
401
  last_pos += len(insert_content)
402
 
 
413
  Returns:
414
  dict: Mapping of tag patterns to replacement values
415
  """
416
+ logger.info(
417
+ f"Creating replacements from metrics: {json.dumps(metrics_data, default=str)}"
418
+ )
419
  # Define mappings between metrics keys and IDML tag patterns
420
  replacements = {
421
  # Project Description
 
450
  ],
451
  }
452
 
453
+ # Create a simplified version of replacements for logging
454
+ simplified_replacements = {}
455
+ for k, v in replacements.items():
456
+ if isinstance(v, list) and len(v) > 0:
457
+ simplified_replacements[k] = v
458
+
459
+ logger.info(
460
+ f"Generated replacements: {json.dumps(simplified_replacements, default=str)}"
461
+ )
462
  return replacements
463
 
464
 
 
473
  Returns:
474
  str: Path to the updated IDML file
475
  """
476
+ logger.info(f"Starting update_idml_content with file: {idml_path}")
477
  # Parse JSON if it's a string
478
  if isinstance(replacements_json, str):
479
  replacements = json.loads(replacements_json)
 
482
 
483
  # Get the directory where app.py is located
484
  app_dir = os.path.dirname(os.path.abspath(__file__))
485
+ logger.info(f"App directory: {app_dir}")
486
 
487
  # Create a temporary directory
488
  with tempfile.TemporaryDirectory() as temp_dir:
489
+ logger.info(f"Created temporary directory: {temp_dir}")
490
  # Create a copy of the IDML file to work with
491
  temp_idml = os.path.join(temp_dir, "temp.idml")
492
+ try:
493
+ shutil.copy2(idml_path, temp_idml)
494
+ logger.info(f"Copied IDML file to: {temp_idml}")
495
+ except Exception as e:
496
+ logger.error(f"Failed to copy IDML file: {str(e)}")
497
+ raise
498
 
499
+ try:
500
+ with idml.IDMLPackage(temp_idml) as working_idml:
501
+ # Find all story files containing our tags
502
+ tag_patterns = list(replacements.keys())
503
+ logger.info(f"Looking for {len(tag_patterns)} tag patterns in IDML")
504
+ tag_to_story = find_story_files(working_idml, tag_patterns)
505
+ logger.info(
506
+ f"Found tag patterns in story files: {json.dumps({k: len(v) for k, v in tag_to_story.items()}, default=str)}"
507
+ )
508
 
509
+ # Extract the IDML
510
+ extract_dir = os.path.join(temp_dir, "extracted")
511
+ os.makedirs(extract_dir, exist_ok=True)
512
+ logger.info(f"Extracting IDML to: {extract_dir}")
513
+ working_idml.extractall(extract_dir)
514
 
515
+ # Process each tag pattern
516
+ for tag_pattern, replacement_values in replacements.items():
517
+ story_files = tag_to_story.get(tag_pattern, [])
 
 
518
 
519
+ if not story_files:
520
+ logger.warning(
521
+ f"No story files found containing pattern '{tag_pattern}'"
522
+ )
523
+ continue
524
 
525
+ logger.info(
526
+ f"Found pattern '{tag_pattern}' in {len(story_files)} story file(s)"
 
 
 
 
 
 
 
 
 
527
  )
528
 
529
+ # Update each story file containing this tag
530
+ for story_path in story_files:
531
+ # Read the XML content
532
+ try:
533
+ with open(
534
+ os.path.join(extract_dir, story_path),
535
+ "r",
536
+ encoding="utf-8",
537
+ ) as f:
538
+ xml_content = f.read()
539
+
540
+ # Update the content
541
+ updated_content = replace_content(
542
+ xml_content, tag_pattern, replacement_values
543
+ )
544
 
545
+ # Write back the updated content
546
+ with open(
547
+ os.path.join(extract_dir, story_path),
548
+ "w",
549
+ encoding="utf-8",
550
+ ) as f:
551
+ f.write(updated_content)
552
+
553
+ logger.info(f"Updated content in {story_path}")
554
+ except Exception as e:
555
+ logger.error(
556
+ f"Error processing story file {story_path}: {str(e)}"
557
+ )
558
 
559
+ # Create the output path in the same directory as app.py
560
+ base_name = os.path.splitext(os.path.basename(idml_path))[0]
561
+ output_filename = (
562
+ f"{base_name}_filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.idml"
563
+ )
564
+ output_path = os.path.join(app_dir, output_filename)
565
+ logger.info(f"Output IDML will be saved to: {output_path}")
566
+
567
+ # Create a new IDML with the updated content
568
+ try:
569
+ logger.info(f"Creating archive from: {extract_dir}")
570
+ shutil.make_archive(output_path, "zip", extract_dir)
571
+ logger.info(f"Renaming {output_path}.zip to {output_path}")
572
+ os.rename(output_path + ".zip", output_path)
573
+ logger.info(f"Successfully created IDML: {output_path}")
574
+ except Exception as e:
575
+ logger.error(f"Error creating archive: {str(e)}")
576
+ raise
577
+
578
+ return output_path
579
+ except Exception as e:
580
+ logger.error(f"Error in IDML processing: {str(e)}")
581
+ raise
582
 
583
 
584
  async def export_idml(graph_state: GraphProcessingState, table_data):
585
  """Export the current metrics, marketing copy, and table data to IDML file"""
586
+ logger.info("Starting export_idml function")
587
  try:
588
  if "idml_file" not in graph_state:
589
+ logger.warning("No IDML file uploaded in graph_state")
590
  return None, "No IDML file uploaded"
591
 
592
  if "metrics" not in graph_state or "marketing_copy" not in graph_state:
593
+ logger.warning("No metrics or marketing copy available in graph_state")
594
  return None, "No metrics or marketing copy available"
595
 
596
+ logger.info(f"IDML file path: {graph_state['idml_file']}")
597
+ logger.info(
598
+ f"Table data: {table_data.shape if table_data is not None else None}"
599
+ )
600
+
601
  updated_data = dict(graph_state["metrics"])
602
+ logger.info(f"Metrics data keys: {updated_data.keys()}")
603
 
604
  if table_data is not None and not table_data.empty:
605
  descriptions = table_data["description"].dropna().tolist()
606
  descriptions = [
607
  desc for desc in descriptions if desc.strip()
608
  ] # Remove empty strings
609
+ logger.info(f"Found {len(descriptions)} descriptions in table data")
610
  else:
611
  descriptions = [""] # If no descriptions, create one empty file
612
+ logger.warning("No descriptions in table data, using empty description")
613
 
614
  # Process each description and create IDML files
615
  output_paths = []
 
617
 
618
  # Process each file one at a time to avoid race conditions
619
  for i, text in enumerate(descriptions):
620
+ try:
621
+ if "Project Description" not in text:
622
+ logger.info(
623
+ f"Processing description {i+1}/{len(descriptions)}: {text[:100]}..."
624
+ )
625
+ updated_data["description"] = text
626
+ replacements = create_replacements_from_metrics(updated_data)
627
+
628
+ # Check if IDML file exists
629
+ if not os.path.exists(graph_state["idml_file"]):
630
+ logger.error(
631
+ f"IDML file does not exist: {graph_state['idml_file']}"
632
+ )
633
+ return None, f"IDML file not found: {graph_state['idml_file']}"
634
+
635
+ output_path = await update_idml_content(
636
+ graph_state["idml_file"], replacements
637
+ )
638
+
639
+ # Verify the output file exists
640
+ if os.path.exists(output_path):
641
+ logger.info(f"Output file created successfully: {output_path}")
642
+ output_paths.append(output_path)
643
+ else:
644
+ logger.error(f"Output file was not created: {output_path}")
645
+
646
+ # Brief pause to ensure unique timestamps
647
+ await asyncio.sleep(1)
648
+ else:
649
+ logger.info(f"Skipping placeholder description {i+1}")
650
+ except Exception as e:
651
+ logger.error(f"Error processing description {i+1}: {str(e)}")
652
+ import traceback
653
+
654
+ logger.error(traceback.format_exc())
655
+
656
+ logger.info(f"Generated {len(output_paths)} IDML files: {output_paths}")
657
+
658
+ if len(output_paths) == 0:
659
+ logger.warning("No IDML files were generated")
660
+ return None, "No IDML files were generated. Check the logs for details."
661
 
 
662
  return output_paths, f"{len(output_paths)} IDML files successfully updated"
663
  except Exception as e:
664
  import traceback
665
 
666
+ logger.error(f"Error in export_idml: {str(e)}")
667
+ logger.error(traceback.format_exc())
668
  return None, f"Error updating IDML: {str(e)}"
669
 
 
 
670
 
671
+ # Create placeholder data for the table
672
  placeholder_data = pd.DataFrame(
673
  {
674
  "description": [
 
688
  with gr.Row():
689
  with gr.Column(scale=4):
690
  chatbot = gr.Chatbot(type="messages", height=700, show_copy_button=True)
691
+ try:
692
+ chatbot.clear(fn=clear, outputs=[gradio_graph_state, uuid_state])
693
+ logger.info("Successfully set up chatbot.clear event")
694
+ except Exception as e:
695
+ logger.error(f"Error setting up chatbot.clear event: {str(e)}")
696
 
697
  multimodal = False
698
  textbox_component = gr.MultimodalTextbox if multimodal else gr.Textbox
 
752
  label="IDML Export Status",
753
  interactive=False,
754
  lines=2,
755
+ visible=True,
756
  )
757
  idml_output = gr.File(
758
  label="Download Updated IDML",
759
  file_count="multiple",
760
  visible=True,
761
  )
762
+
763
+ try:
764
+ download_btn.click(
765
+ fn=download_csv,
766
+ inputs=[table],
767
+ outputs=gr.File(label="Download CSV"),
768
+ )
769
+ logger.info("Successfully set up download_btn.click event")
770
+ except Exception as e:
771
+ logger.error(f"Error setting up download_btn.click event: {str(e)}")
772
+
773
+ try:
774
+ export_idml_btn.click(
775
+ fn=export_idml,
776
+ inputs=[gradio_graph_state, table],
777
+ outputs=[idml_output, idml_status],
778
+ )
779
+ logger.info("Successfully set up export_idml_btn.click event")
780
+ except Exception as e:
781
+ logger.error(
782
+ f"Error setting up export_idml_btn.click event: {str(e)}"
783
+ )
784
  metrics_display = gr.Textbox(
785
  label="Project Metrics", interactive=False, lines=1, scale=1
786
  )
logging-config.json CHANGED
@@ -22,7 +22,7 @@
22
  }
23
  },
24
  "root": {
25
- "level": "INFO",
26
  "handlers": ["console", "file"]
27
  }
28
  }
 
22
  }
23
  },
24
  "root": {
25
+ "level": "DEBUG",
26
  "handlers": ["console", "file"]
27
  }
28
  }