flozi00 commited on
Commit
799a764
·
1 Parent(s): eaafbab

Enhance extraction output: include Markdown format and picture descriptions in the result

Browse files
Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -68,7 +68,29 @@ def process_extraction(file_input, url_input, template_json):
68
  converter = get_converter_with_vision()
69
  try:
70
  result = converter.convert(source)
71
- return json.dumps(result.document.export_to_dict(), indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  except Exception as e:
73
  return json.dumps({"error": f"Conversion failed: {str(e)}"}, indent=2)
74
 
 
68
  converter = get_converter_with_vision()
69
  try:
70
  result = converter.convert(source)
71
+ doc = result.document
72
+
73
+ # Create a simplified output with Markdown and picture descriptions
74
+ simplified_output = {
75
+ "markdown": doc.export_to_markdown(),
76
+ "pictures": [],
77
+ }
78
+
79
+ # Extract picture descriptions if available
80
+ if hasattr(doc, "pictures"):
81
+ for i, pic in enumerate(doc.pictures):
82
+ descriptions = []
83
+ if hasattr(pic, "annotations"):
84
+ for ann in pic.annotations:
85
+ if hasattr(ann, "text"):
86
+ descriptions.append(ann.text)
87
+
88
+ if descriptions:
89
+ simplified_output["pictures"].append(
90
+ {"index": i, "descriptions": descriptions}
91
+ )
92
+
93
+ return json.dumps(simplified_output, indent=2)
94
  except Exception as e:
95
  return json.dumps({"error": f"Conversion failed: {str(e)}"}, indent=2)
96