vachaspathi commited on
Commit
333db8a
·
verified ·
1 Parent(s): 120af7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -73
app.py CHANGED
@@ -5,7 +5,7 @@ import zipfile
5
  from jinja2 import Template
6
  from weasyprint import HTML
7
 
8
- # --- 1. Universal Document Template ---
9
  DOC_TEMPLATE = """
10
  <!DOCTYPE html>
11
  <html lang="en">
@@ -56,8 +56,8 @@ DOC_TEMPLATE = """
56
  <div class="addresses">
57
  <div class="address-box">
58
  {% if doc_type == 'expenses' %}
59
- <strong>Payable To:</strong>
60
- <div>{{ payee }}</div>
61
  {% else %}
62
  <strong>Bill To:</strong>
63
  {% if customer_details %}
@@ -68,11 +68,11 @@ DOC_TEMPLATE = """
68
  {% if customer_details.billing_address %}
69
  {% set addr = customer_details.billing_address %}
70
  <div>{{ addr.address }}</div>
71
- <div>{{ addr.city }}, {{ addr.state }} {{ addr.zip }}</div>
72
  <div>{{ addr.country }}</div>
73
  {% endif %}
74
  {% else %}
75
- <div>{{ customer_id }}</div>
76
  {% endif %}
77
  {% endif %}
78
  </div>
@@ -81,7 +81,7 @@ DOC_TEMPLATE = """
81
  {% if doc_type == 'expenses' %}
82
  <strong>Category:</strong> {{ category }}
83
  {% elif doc_type == 'delivery_challans' %}
84
- <strong>Ref Invoice:</strong> {{ reference_invoice }}
85
  {% elif doc_type == 'recurring_invoices' %}
86
  <strong>Frequency:</strong> {{ frequency }}
87
  {% endif %}
@@ -92,14 +92,9 @@ DOC_TEMPLATE = """
92
  <thead>
93
  <tr>
94
  <th width="50%">Item & Description</th>
95
- {% if doc_type == 'delivery_challans' %}
96
- <th>Packed In</th>
97
- <th class="text-right">Quantity</th>
98
- {% else %}
99
- <th class="text-right">Qty</th>
100
- <th class="text-right">Rate</th>
101
- <th class="text-right">Amount</th>
102
- {% endif %}
103
  </tr>
104
  </thead>
105
  <tbody>
@@ -111,14 +106,9 @@ DOC_TEMPLATE = """
111
  <div class="item-desc">{{ item.description }}</div>
112
  {% endif %}
113
  </td>
114
- {% if doc_type == 'delivery_challans' %}
115
- <td>{{ item.packed_in }}</td>
116
- <td class="text-right">{{ item.quantity }}</td>
117
- {% else %}
118
- <td class="text-right">{{ item.quantity }}</td>
119
- <td class="text-right">{{ item.rate }}</td>
120
- <td class="text-right">{{ item.calculated_amount }}</td>
121
- {% endif %}
122
  </tr>
123
  {% endfor %}
124
  </tbody>
@@ -139,16 +129,84 @@ DOC_TEMPLATE = """
139
  {% if notes %}
140
  <p><strong>Notes:</strong> {{ notes }}</p>
141
  {% endif %}
142
- {% if description %}
143
- <p><strong>Description:</strong> {{ description }}</p>
144
- {% endif %}
145
- <p>Generated via HuggingFace Spaces</p>
146
  </div>
147
  </body>
148
  </html>
149
  """
150
 
151
- # --- 2. Logic ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  def get_contact_map(data):
154
  contacts = data.get("contacts", [])
@@ -160,20 +218,24 @@ def get_contact_map(data):
160
  return c_map
161
 
162
  def normalize_record(record, doc_type, contact_map):
163
- # Defaults
164
- title = doc_type.replace("_", " ").upper()
165
- number = "DOC-000"
166
-
167
  if doc_type == "invoices":
 
168
  number = record.get("invoice_number", "INV-000")
169
  elif doc_type == "recurring_invoices":
 
170
  number = record.get("profile_name", "REC-000")
171
  elif doc_type == "delivery_challans":
 
172
  number = record.get("challan_number", "DC-000")
173
  elif doc_type == "expenses":
 
174
  number = record.get("expense_id", "EXP-000")
 
 
 
175
 
176
- # Process Items
177
  items = record.get("line_items", [])
178
  sub_total = 0.0
179
 
@@ -181,11 +243,12 @@ def normalize_record(record, doc_type, contact_map):
181
  try: qty = float(item.get("quantity", 1))
182
  except: qty = 0
183
 
 
184
  try: rate = float(item.get("rate", item.get("amount", 0)))
185
  except: rate = 0
186
 
187
  amount = item.get("amount")
188
- if amount is None:
189
  amount = qty * rate
190
  else:
191
  try: amount = float(amount)
@@ -194,10 +257,9 @@ def normalize_record(record, doc_type, contact_map):
194
  item["quantity"] = qty
195
  item["rate"] = f"{rate:,.2f}"
196
  item["calculated_amount"] = f"{amount:,.2f}"
197
- item["packed_in"] = item.get("packed_in", "-") # For challans
198
-
199
  sub_total += amount
200
 
 
201
  cust_id = record.get("customer_id")
202
 
203
  return {
@@ -209,25 +271,24 @@ def normalize_record(record, doc_type, contact_map):
209
  "status": record.get("status"),
210
  "customer_id": cust_id,
211
  "customer_details": contact_map.get(cust_id, {}),
212
- "payee": record.get("payee"),
213
  "category": record.get("category"),
214
  "description": record.get("description"),
215
- "reference_invoice": record.get("reference_invoice", ""),
216
  "frequency": record.get("frequency"),
217
  "line_items": items,
218
  "calculated_total": f"{sub_total:,.2f}",
219
  "notes": record.get("notes", "")
220
  }
221
 
 
 
222
  def render_pdf(record):
223
  template = Template(DOC_TEMPLATE)
224
  html_content = template.render(**record)
225
 
226
- # Create Filename
227
  safe_name = str(record['doc_number']).replace(" ", "_").replace("/", "-").replace("\\", "-")
228
  filename = f"{safe_name}.pdf"
229
 
230
- # WRITE PDF - IF THIS FAILS, GRADIO WILL SHOW THE REAL ERROR
231
  HTML(string=html_content).write_pdf(filename)
232
  return filename
233
 
@@ -235,64 +296,55 @@ def process_file(file_obj):
235
  if file_obj is None:
236
  return None
237
 
238
- # 1. Load JSON
239
  try:
240
  with open(file_obj.name, 'r', encoding='utf-8') as f:
241
- raw_data = json.load(f)
242
  except Exception as e:
243
  raise gr.Error(f"JSON Load Error: {e}")
244
 
245
- # 2. Map Contacts
 
 
 
246
  contact_map = get_contact_map(raw_data)
247
 
248
  generated_files = []
249
  keys_to_process = ["invoices", "recurring_invoices", "delivery_challans", "expenses"]
250
 
251
- # Debug: Print found keys to logs
252
- print(f"DEBUG: JSON Keys found: {list(raw_data.keys())}")
253
-
254
- # 3. Process Records
255
- found_records_count = 0
256
-
257
  for key in keys_to_process:
258
  records = raw_data.get(key, [])
259
  if not isinstance(records, list):
260
  continue
261
 
262
  for rec in records:
263
- found_records_count += 1
264
- # Normalize
265
- norm_rec = normalize_record(rec, key, contact_map)
266
- # Render (No Try/Except here, let it fail so we see why)
267
- pdf_path = render_pdf(norm_rec)
268
- generated_files.append(pdf_path)
269
-
270
- if found_records_count == 0:
271
- raise gr.Error(f"Structure Error: JSON parsed, but no arrays found for keys: {keys_to_process}. Check your JSON keys.")
272
 
273
  if not generated_files:
274
- raise gr.Error("Unknown Error: Records found but no PDFs generated.")
 
 
275
 
276
- # 4. Zip or Return
277
- if len(generated_files) == 1:
278
- return generated_files[0]
279
- else:
280
- zip_name = "documents.zip"
281
- with zipfile.ZipFile(zip_name, 'w') as zf:
282
- for f in generated_files:
283
- zf.write(f)
284
- # Optional: keep files for debugging or remove them
285
- os.remove(f)
286
- return zip_name
287
 
288
- # --- 3. Interface ---
289
 
290
  iface = gr.Interface(
291
  fn=process_file,
292
- inputs=gr.File(label="Upload JSON", file_types=[".json"]),
293
  outputs=gr.File(label="Download Result"),
294
- title="Universal Document Generator",
295
- description="Upload a JSON file. Supported keys: invoices, recurring_invoices, delivery_challans, expenses.",
296
  allow_flagging="never"
297
  )
298
 
 
5
  from jinja2 import Template
6
  from weasyprint import HTML
7
 
8
+ # --- 1. PDF Template ---
9
  DOC_TEMPLATE = """
10
  <!DOCTYPE html>
11
  <html lang="en">
 
56
  <div class="addresses">
57
  <div class="address-box">
58
  {% if doc_type == 'expenses' %}
59
+ <strong>Payable To / Vendor:</strong>
60
+ <div>{{ payee or 'N/A' }}</div>
61
  {% else %}
62
  <strong>Bill To:</strong>
63
  {% if customer_details %}
 
68
  {% if customer_details.billing_address %}
69
  {% set addr = customer_details.billing_address %}
70
  <div>{{ addr.address }}</div>
71
+ <div>{{ addr.city }} {{ addr.zip }}</div>
72
  <div>{{ addr.country }}</div>
73
  {% endif %}
74
  {% else %}
75
+ <div>ID: {{ customer_id }}</div>
76
  {% endif %}
77
  {% endif %}
78
  </div>
 
81
  {% if doc_type == 'expenses' %}
82
  <strong>Category:</strong> {{ category }}
83
  {% elif doc_type == 'delivery_challans' %}
84
+ <strong>Details:</strong> Shipping Note
85
  {% elif doc_type == 'recurring_invoices' %}
86
  <strong>Frequency:</strong> {{ frequency }}
87
  {% endif %}
 
92
  <thead>
93
  <tr>
94
  <th width="50%">Item & Description</th>
95
+ <th class="text-right">Qty</th>
96
+ <th class="text-right">Rate</th>
97
+ <th class="text-right">Amount</th>
 
 
 
 
 
98
  </tr>
99
  </thead>
100
  <tbody>
 
106
  <div class="item-desc">{{ item.description }}</div>
107
  {% endif %}
108
  </td>
109
+ <td class="text-right">{{ item.quantity }}</td>
110
+ <td class="text-right">{{ item.rate }}</td>
111
+ <td class="text-right">{{ item.calculated_amount }}</td>
 
 
 
 
 
112
  </tr>
113
  {% endfor %}
114
  </tbody>
 
129
  {% if notes %}
130
  <p><strong>Notes:</strong> {{ notes }}</p>
131
  {% endif %}
132
+ <p>Generated Document</p>
 
 
 
133
  </div>
134
  </body>
135
  </html>
136
  """
137
 
138
+ # --- 2. Data Cleaning & Mapping Logic ---
139
+
140
+ def clean_input_data(raw_data):
141
+ """
142
+ Fixes the input data:
143
+ 1. Extracts inner JSON if it's wrapped in Pandoc/Markdown blocks.
144
+ 2. Renames specific fields (customer_ref -> customer_id) to match the template.
145
+ """
146
+
147
+ # 1. Handle Pandoc/Markdown wrapper (The "Structure Error" fix)
148
+ if "blocks" in raw_data and isinstance(raw_data["blocks"], list):
149
+ print("DEBUG: Detected Pandoc format. Extracting inner JSON...")
150
+ try:
151
+ # Iterate through blocks to find the CodeBlock containing the JSON string
152
+ for block in raw_data["blocks"]:
153
+ if block.get("t") == "CodeBlock":
154
+ # The content is usually in block['c'][1]
155
+ inner_json_str = block["c"][1]
156
+ return clean_input_data(json.loads(inner_json_str))
157
+ except Exception as e:
158
+ print(f"DEBUG: Failed to extract inner JSON: {e}")
159
+ # If extraction fails, return raw_data and hope for the best
160
+ pass
161
+
162
+ # 2. Standardize Keys (The "Data Missing" fix)
163
+ # We create a new clean dictionary mapping standard keys to the user's keys
164
+ cleaned = {}
165
+
166
+ # Map Clients -> Contacts
167
+ if "clients" in raw_data:
168
+ cleaned["contacts"] = []
169
+ for c in raw_data["clients"]:
170
+ # Map 'contact' object to top level if nested
171
+ contact_info = c.get("contact", c)
172
+ # Ensure client_id maps to contact_id
173
+ contact_info["contact_id"] = c.get("client_id", c.get("contact_id"))
174
+ cleaned["contacts"].append(contact_info)
175
+ elif "contacts" in raw_data:
176
+ cleaned["contacts"] = raw_data["contacts"]
177
+
178
+ # Process Transactions
179
+ # Map (User's Key) -> (Standard Key)
180
+ mappings = [
181
+ ("invoices", "invoices"),
182
+ ("recurring_invoices", "recurring_invoices"),
183
+ ("delivery_challans", "delivery_challans"),
184
+ ("expenses", "expenses")
185
+ ]
186
+
187
+ for user_key, standard_key in mappings:
188
+ if user_key in raw_data:
189
+ cleaned[standard_key] = []
190
+ for item in raw_data[user_key]:
191
+ # Fix Reference IDs
192
+ if "customer_ref" in item:
193
+ item["customer_id"] = item["customer_ref"]
194
+ if "client_ref" in item:
195
+ item["customer_id"] = item["client_ref"]
196
+
197
+ # Fix Items List
198
+ if "items" in item:
199
+ item["line_items"] = item["items"]
200
+
201
+ # Fix Recurring Invoice Name
202
+ if "name" in item and standard_key == "recurring_invoices":
203
+ item["profile_name"] = item["name"]
204
+ if "recurrence_frequency" in item:
205
+ item["frequency"] = item["recurrence_frequency"]
206
+
207
+ cleaned[standard_key].append(item)
208
+
209
+ return cleaned
210
 
211
  def get_contact_map(data):
212
  contacts = data.get("contacts", [])
 
218
  return c_map
219
 
220
  def normalize_record(record, doc_type, contact_map):
221
+ # Determine Title and Number
 
 
 
222
  if doc_type == "invoices":
223
+ title = "INVOICE"
224
  number = record.get("invoice_number", "INV-000")
225
  elif doc_type == "recurring_invoices":
226
+ title = "RECURRING INVOICE"
227
  number = record.get("profile_name", "REC-000")
228
  elif doc_type == "delivery_challans":
229
+ title = "DELIVERY CHALLAN"
230
  number = record.get("challan_number", "DC-000")
231
  elif doc_type == "expenses":
232
+ title = "EXPENSE RECEIPT"
233
  number = record.get("expense_id", "EXP-000")
234
+ else:
235
+ title = doc_type.upper()
236
+ number = "DOC-000"
237
 
238
+ # Process Line Items
239
  items = record.get("line_items", [])
240
  sub_total = 0.0
241
 
 
243
  try: qty = float(item.get("quantity", 1))
244
  except: qty = 0
245
 
246
+ # Expenses often have 'amount' directly, others have 'rate'
247
  try: rate = float(item.get("rate", item.get("amount", 0)))
248
  except: rate = 0
249
 
250
  amount = item.get("amount")
251
+ if amount is None or doc_type != 'expenses':
252
  amount = qty * rate
253
  else:
254
  try: amount = float(amount)
 
257
  item["quantity"] = qty
258
  item["rate"] = f"{rate:,.2f}"
259
  item["calculated_amount"] = f"{amount:,.2f}"
 
 
260
  sub_total += amount
261
 
262
+ # Get Customer Details
263
  cust_id = record.get("customer_id")
264
 
265
  return {
 
271
  "status": record.get("status"),
272
  "customer_id": cust_id,
273
  "customer_details": contact_map.get(cust_id, {}),
274
+ "payee": record.get("payee", record.get("category")), # Fallback for expenses
275
  "category": record.get("category"),
276
  "description": record.get("description"),
 
277
  "frequency": record.get("frequency"),
278
  "line_items": items,
279
  "calculated_total": f"{sub_total:,.2f}",
280
  "notes": record.get("notes", "")
281
  }
282
 
283
+ # --- 3. Main Process ---
284
+
285
  def render_pdf(record):
286
  template = Template(DOC_TEMPLATE)
287
  html_content = template.render(**record)
288
 
 
289
  safe_name = str(record['doc_number']).replace(" ", "_").replace("/", "-").replace("\\", "-")
290
  filename = f"{safe_name}.pdf"
291
 
 
292
  HTML(string=html_content).write_pdf(filename)
293
  return filename
294
 
 
296
  if file_obj is None:
297
  return None
298
 
 
299
  try:
300
  with open(file_obj.name, 'r', encoding='utf-8') as f:
301
+ raw_input = json.load(f)
302
  except Exception as e:
303
  raise gr.Error(f"JSON Load Error: {e}")
304
 
305
+ # CLEAN THE DATA
306
+ raw_data = clean_input_data(raw_input)
307
+
308
+ # Map Contacts
309
  contact_map = get_contact_map(raw_data)
310
 
311
  generated_files = []
312
  keys_to_process = ["invoices", "recurring_invoices", "delivery_challans", "expenses"]
313
 
 
 
 
 
 
 
314
  for key in keys_to_process:
315
  records = raw_data.get(key, [])
316
  if not isinstance(records, list):
317
  continue
318
 
319
  for rec in records:
320
+ try:
321
+ norm_rec = normalize_record(rec, key, contact_map)
322
+ pdf_path = render_pdf(norm_rec)
323
+ generated_files.append(pdf_path)
324
+ except Exception as e:
325
+ print(f"Error processing {key} record: {e}")
 
 
 
326
 
327
  if not generated_files:
328
+ # Check debug info
329
+ found_keys = list(raw_data.keys())
330
+ raise gr.Error(f"No PDFs generated. Found keys after cleaning: {found_keys}")
331
 
332
+ # Zip Result
333
+ zip_name = "documents.zip"
334
+ with zipfile.ZipFile(zip_name, 'w') as zf:
335
+ for f in generated_files:
336
+ zf.write(f)
337
+ os.remove(f)
338
+ return zip_name
 
 
 
 
339
 
340
+ # --- 4. Interface ---
341
 
342
  iface = gr.Interface(
343
  fn=process_file,
344
+ inputs=gr.File(label="Upload JSON (Zoho/Pandoc format supported)", file_types=[".json"]),
345
  outputs=gr.File(label="Download Result"),
346
+ title="Universal Invoice & Expense Generator",
347
+ description="Upload your JSON file. This tool automatically detects Invoice, Recurring Invoice, Challan, and Expense data, cleans the format, and generates PDFs.",
348
  allow_flagging="never"
349
  )
350