bakyt92 commited on
Commit
e7c4b2b
Β·
1 Parent(s): 3638939

update of api connection

Browse files
Files changed (3) hide show
  1. app.py +14 -3
  2. config.py +2 -2
  3. wildberries_client.py +227 -101
app.py CHANGED
@@ -426,8 +426,10 @@ def create_interface():
426
 
427
  ### πŸ“Š Sales Analytics
428
  - **Week Analysis**: Shows sales data for the last 7 days
429
- - **Month Analysis**: Shows sales data for the last 30 days
430
- - View total revenue, order count, and top-performing products
 
 
431
 
432
  ### πŸ“¦ Inventory Forecasting
433
  Choose from multiple forecasting methods:
@@ -442,8 +444,17 @@ def create_interface():
442
  - 🟑 **Warning** (7-14 days): Monitor closely
443
  - 🟒 **Safe** (> 14 days): Adequate stock levels
444
 
 
 
 
 
 
445
  ### πŸ”— API Information
446
- This dashboard uses the [Wildberries API](https://dev.wildberries.ru/en/openapi/api-information) with respect for rate limits (300 requests/minute).
 
 
 
 
447
 
448
  ### πŸ› οΈ Technical Details
449
  - **Framework**: Gradio + FastMCP
 
426
 
427
  ### πŸ“Š Sales Analytics
428
  - **Week Analysis**: Shows sales data for the last 7 days
429
+ - **Month Analysis**: Shows sales data for the last 30 days
430
+ - **Enhanced Metrics**: Commission analysis, net revenue, platform fees
431
+ - **Commission Dashboard**: Detailed commission breakdown by products
432
+ - **Pagination**: Automatically handles large datasets (80,000+ records)
433
 
434
  ### πŸ“¦ Inventory Forecasting
435
  Choose from multiple forecasting methods:
 
444
  - 🟑 **Warning** (7-14 days): Monitor closely
445
  - 🟒 **Safe** (> 14 days): Adequate stock levels
446
 
447
+ ### πŸ” Data Validation
448
+ - **Consistency Checks**: Automatic validation of data quality
449
+ - **Duplicate Detection**: Identifies duplicate sales records
450
+ - **Data Aggregation**: Performance optimization for large datasets
451
+
452
  ### πŸ”— API Information
453
+ This dashboard uses the [Wildberries API](https://dev.wildberries.ru/en/openapi/api-information):
454
+ - **Sales Endpoint**: `/api/v1/supplier/sales` (with automatic pagination)
455
+ - **Stocks Endpoint**: `/api/v1/supplier/stocks`
456
+ - **Rate Limits**: 300 requests/minute (respected automatically)
457
+ - **Data Retention**: Sales data available for 90 days
458
 
459
  ### πŸ› οΈ Technical Details
460
  - **Framework**: Gradio + FastMCP
config.py CHANGED
@@ -74,8 +74,8 @@ class Config:
74
  def get_endpoints(self) -> Dict[str, str]:
75
  """Get API endpoint configurations based on working API calls"""
76
  return {
77
- # Statistics API endpoints - Updated to working v5 version
78
- "sales": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
79
  "orders": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
80
  "stocks": f"{self.wildberries_base_url}/api/v1/supplier/stocks",
81
  "incomes": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
 
74
  def get_endpoints(self) -> Dict[str, str]:
75
  """Get API endpoint configurations based on working API calls"""
76
  return {
77
+ # Statistics API endpoints - Correct sales endpoint
78
+ "sales": f"{self.wildberries_base_url}/api/v1/supplier/sales",
79
  "orders": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
80
  "stocks": f"{self.wildberries_base_url}/api/v1/supplier/stocks",
81
  "incomes": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
wildberries_client.py CHANGED
@@ -46,26 +46,13 @@ def validate_wb_data(weekly_data: pd.DataFrame, monthly_data: pd.DataFrame) -> D
46
  f"Data inconsistency: Weekly revenue (β‚½{weekly_revenue:,.0f}) exceeds monthly revenue (β‚½{monthly_revenue:,.0f})"
47
  )
48
 
49
- # Check for empty doc_type_name (logistics entries)
50
  for df_name, df in [("weekly", weekly_data), ("monthly", monthly_data)]:
51
- if not df.empty and 'document_type' in df.columns:
52
- logistics_count = df[df['document_type'].isna() | (df['document_type'] == '')].shape[0]
53
- if logistics_count > 0:
54
  validation_results["warnings"].append(
55
- f"{df_name.title()} data: {logistics_count} logistics entries found (empty doc_type_name)"
56
- )
57
-
58
- # Check for zero quantities in sales records
59
- for df_name, df in [("weekly", weekly_data), ("monthly", monthly_data)]:
60
- if not df.empty and 'quantity' in df.columns and 'document_type' in df.columns:
61
- zero_qty_sales = df[
62
- (df['quantity'] == 0) &
63
- (df['document_type'].notna()) &
64
- (df['document_type'] != '')
65
- ].shape[0]
66
- if zero_qty_sales > 0:
67
- validation_results["warnings"].append(
68
- f"{df_name.title()} data: {zero_qty_sales} sales records with zero quantity"
69
  )
70
 
71
  except Exception as e:
@@ -95,12 +82,10 @@ def aggregate_wb_data(df: pd.DataFrame, period: str = 'daily') -> pd.DataFrame:
95
  }
96
 
97
  # Add optional columns if they exist
98
- if 'sales_commission' in df.columns:
99
- agg_functions['sales_commission'] = 'sum'
100
  if 'amount_for_pay' in df.columns:
101
  agg_functions['amount_for_pay'] = 'sum'
102
- if 'delivery_cost' in df.columns:
103
- agg_functions['delivery_cost'] = 'sum'
104
 
105
  if period == 'daily':
106
  grouped = df.groupby(df['sale_date'].dt.date)
@@ -245,46 +230,84 @@ class WildberriesAPI:
245
  logger.error(f"Request failed: {str(e)}")
246
  raise WildberriesAPIError(f"Request failed: {str(e)}")
247
 
248
- def get_sales(self, date_from: str, date_to: str = None) -> pd.DataFrame:
249
  """
250
- Get sales data from Wildberries API
251
 
252
  Args:
253
- date_from: Start date in YYYY-MM-DD format
254
- date_to: End date in YYYY-MM-DD format (optional)
 
255
 
256
  Returns:
257
  pandas.DataFrame with sales data
 
 
258
  """
259
  endpoint = self.config.get_endpoints()["sales"]
260
 
261
- # Add automatic dateTo defaulting to today's date when not provided
262
- if not date_to:
263
- date_to = datetime.now().strftime("%Y-%m-%d")
264
-
265
- params = {"dateFrom": date_from, "dateTo": date_to, "limit": 100}
266
 
267
  try:
268
- response = self._make_request("GET", endpoint, params=params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
- if not response:
271
- logger.warning("No sales data returned from API")
272
- return pd.DataFrame()
273
 
274
- # Handle direct array response (v5 API format)
275
- if isinstance(response, list):
276
- sales_data = pd.DataFrame(response)
277
- elif isinstance(response, dict) and "data" in response:
278
- sales_data = pd.DataFrame(response["data"])
279
- else:
280
- logger.warning("Unexpected API response format")
281
  return pd.DataFrame()
282
 
283
- if sales_data.empty:
284
- return sales_data
 
285
 
286
- # Process and clean the data
287
- sales_data = self._process_reportdetail_data(sales_data)
288
 
289
  return sales_data
290
 
@@ -294,41 +317,80 @@ class WildberriesAPI:
294
 
295
  def get_stocks(self, date_from: str = None, date_to: str = None) -> pd.DataFrame:
296
  """
297
- Get current stock levels from Wildberries API
298
 
299
  Args:
300
- date_from: Date to get stock levels for (optional, defaults to yesterday)
301
  date_to: Not used for stocks endpoint (stocks API uses only dateFrom)
302
 
303
  Returns:
304
  pandas.DataFrame with stock data
 
 
305
  """
306
  endpoint = self.config.get_endpoints()["stocks"]
307
 
 
308
  if not date_from:
309
- date_from = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
310
 
311
- # Stocks API uses only dateFrom parameter
312
- params = {"dateFrom": date_from}
 
 
313
 
314
  try:
315
- response = self._make_request("GET", endpoint, params=params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
- if not response:
318
- logger.warning("No stock data returned from API")
319
- return pd.DataFrame()
320
 
321
- # Stocks API returns direct array response
322
- if isinstance(response, list):
323
- stock_data = pd.DataFrame(response)
324
- elif isinstance(response, dict) and "data" in response:
325
- stock_data = pd.DataFrame(response["data"])
326
- else:
327
- logger.warning("Unexpected API response format")
328
  return pd.DataFrame()
329
 
330
- if stock_data.empty:
331
- return stock_data
 
332
 
333
  # Process and clean the stock data using specific stock processing
334
  stock_data = self._process_stock_data(stock_data)
@@ -387,36 +449,38 @@ class WildberriesAPI:
387
  raise WildberriesAPIError(f"Failed to fetch orders data: {str(e)}")
388
 
389
  def _process_sales_data(self, df: pd.DataFrame) -> pd.DataFrame:
390
- """Process and clean sales data from API response"""
391
 
392
- # Rename columns to standardized names
393
  column_mapping = {
394
- 'srid': 'sale_id',
395
- 'supplierArticle': 'article',
396
- 'nmId': 'product_id',
397
  'date': 'sale_date',
398
  'lastChangeDate': 'last_change_date',
399
  'warehouseName': 'warehouse',
 
400
  'countryName': 'country',
401
  'oblastOkrugName': 'region',
402
  'regionName': 'city',
 
 
 
 
 
 
 
403
  'incomeID': 'income_id',
404
  'isSupply': 'is_supply',
405
  'isRealization': 'is_realization',
406
- 'totalPrice': 'total_price',
407
  'discountPercent': 'discount_percent',
408
  'spp': 'spp_discount',
409
- 'paymentSaleAmount': 'sale_amount',
410
- 'forPay': 'amount_to_pay',
411
  'finishedPrice': 'finished_price',
412
  'priceWithDisc': 'price_with_discount',
413
- 'saleID': 'external_sale_id',
414
- 'orderType': 'order_type',
415
  'sticker': 'sticker',
416
  'gNumber': 'g_number',
417
- 'techSize': 'tech_size',
418
- 'brand': 'brand',
419
- 'subject': 'category'
420
  }
421
 
422
  # Rename columns that exist
@@ -431,32 +495,60 @@ class WildberriesAPI:
431
  df[col] = pd.to_datetime(df[col], errors='coerce')
432
 
433
  # Convert numeric columns
434
- numeric_columns = ['total_price', 'discount_percent', 'sale_amount', 'amount_to_pay', 'finished_price', 'price_with_discount']
 
 
 
435
  for col in numeric_columns:
436
  if col in df.columns:
437
  df[col] = pd.to_numeric(df[col], errors='coerce')
438
 
439
- # Add calculated fields
440
- if 'total_price' in df.columns:
441
- df['quantity'] = 1 # Each row represents one sale
442
-
443
- # Add product name (if not available, use article)
444
  if 'product_name' not in df.columns:
445
- if 'article' in df.columns:
446
- df['product_name'] = df['article']
 
 
447
  else:
448
  df['product_name'] = 'Unknown Product'
449
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  return df
451
 
452
  def _process_stock_data(self, df: pd.DataFrame) -> pd.DataFrame:
453
- """Process and clean stock data from API response"""
454
 
455
- # Rename columns to standardized names
456
  column_mapping = {
457
  'lastChangeDate': 'last_change_date',
458
  'warehouseName': 'warehouse',
459
- 'supplierArticle': 'article',
460
  'nmId': 'product_id',
461
  'barcode': 'barcode',
462
  'quantity': 'current_stock',
@@ -468,7 +560,7 @@ class WildberriesAPI:
468
  'brand': 'brand',
469
  'techSize': 'tech_size',
470
  'Price': 'price',
471
- 'Discount': 'discount',
472
  'isSupply': 'is_supply',
473
  'isRealization': 'is_realization',
474
  'SCCode': 'sc_code'
@@ -484,27 +576,38 @@ class WildberriesAPI:
484
  df['last_change_date'] = pd.to_datetime(df['last_change_date'], errors='coerce')
485
 
486
  # Convert numeric columns
487
- numeric_columns = ['current_stock', 'in_way_to_client', 'in_way_from_client', 'quantity_full', 'price', 'discount']
 
 
 
488
  for col in numeric_columns:
489
  if col in df.columns:
490
  df[col] = pd.to_numeric(df[col], errors='coerce')
491
 
492
- # Add product name (if not available, use article)
493
  if 'product_name' not in df.columns:
494
- if 'article' in df.columns:
495
- df['product_name'] = df['article']
 
 
 
 
496
  else:
497
  df['product_name'] = 'Unknown Product'
498
 
499
- # Add missing columns that dashboard expects for inventory data
500
- # Create total_price from price field
 
 
 
 
 
 
501
  if 'total_price' not in df.columns:
502
- if 'price' in df.columns:
503
- # Calculate discounted price: price * (1 - discount/100)
504
- if 'discount' in df.columns:
505
- df['total_price'] = df['price'] * (1 - df['discount'] / 100)
506
- else:
507
- df['total_price'] = df['price']
508
  else:
509
  df['total_price'] = 0
510
 
@@ -526,6 +629,29 @@ class WildberriesAPI:
526
  if 'sale_amount' not in df.columns:
527
  df['sale_amount'] = df['total_price'] * df['quantity']
528
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
  return df
530
 
531
  def _process_reportdetail_data(self, df: pd.DataFrame) -> pd.DataFrame:
 
46
  f"Data inconsistency: Weekly revenue (β‚½{weekly_revenue:,.0f}) exceeds monthly revenue (β‚½{monthly_revenue:,.0f})"
47
  )
48
 
49
+ # Check for duplicate sales IDs
50
  for df_name, df in [("weekly", weekly_data), ("monthly", monthly_data)]:
51
+ if not df.empty and 'srid' in df.columns:
52
+ duplicate_count = df.duplicated('srid').sum()
53
+ if duplicate_count > 0:
54
  validation_results["warnings"].append(
55
+ f"{df_name.title()} data: {duplicate_count} duplicate sale IDs found"
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  )
57
 
58
  except Exception as e:
 
82
  }
83
 
84
  # Add optional columns if they exist
 
 
85
  if 'amount_for_pay' in df.columns:
86
  agg_functions['amount_for_pay'] = 'sum'
87
+ if 'payment_sale_amount' in df.columns:
88
+ agg_functions['payment_sale_amount'] = 'sum'
89
 
90
  if period == 'daily':
91
  grouped = df.groupby(df['sale_date'].dt.date)
 
230
  logger.error(f"Request failed: {str(e)}")
231
  raise WildberriesAPIError(f"Request failed: {str(e)}")
232
 
233
+ def get_sales(self, date_from: str, date_to: str = None, flag: int = 0) -> pd.DataFrame:
234
  """
235
+ Get sales data from Wildberries API with automatic pagination
236
 
237
  Args:
238
+ date_from: Start date in YYYY-MM-DD format or lastChangeDate for pagination
239
+ date_to: End date in YYYY-MM-DD format (optional, not used by sales API)
240
+ flag: 0 for sales and returns, 1 for only sales (optional)
241
 
242
  Returns:
243
  pandas.DataFrame with sales data
244
+
245
+ Note: API limit is 80,000 rows per request. This method handles pagination automatically.
246
  """
247
  endpoint = self.config.get_endpoints()["sales"]
248
 
249
+ all_sales_data = []
250
+ current_date_from = date_from
251
+ max_iterations = 50 # Safety limit to prevent infinite loops
252
+ iteration = 0
 
253
 
254
  try:
255
+ while iteration < max_iterations:
256
+ # Build parameters for sales API
257
+ params = {"dateFrom": current_date_from}
258
+ if flag is not None:
259
+ params["flag"] = flag
260
+
261
+ logger.info(f"Fetching sales data from {current_date_from} (iteration {iteration + 1})")
262
+
263
+ response = self._make_request("GET", endpoint, params=params)
264
+
265
+ if not response:
266
+ logger.warning("No sales data returned from API")
267
+ break
268
+
269
+ # Sales API returns direct array
270
+ if isinstance(response, list):
271
+ batch_data = response
272
+ else:
273
+ logger.warning("Unexpected API response format for sales")
274
+ break
275
+
276
+ if not batch_data:
277
+ logger.info("Empty response received - all sales data retrieved")
278
+ break
279
+
280
+ logger.info(f"Retrieved {len(batch_data)} sales records")
281
+ all_sales_data.extend(batch_data)
282
+
283
+ # Check if we need pagination (response has 80,000 rows)
284
+ if len(batch_data) < 80000:
285
+ logger.info("Received less than 80,000 rows - all data retrieved")
286
+ break
287
+
288
+ # Get lastChangeDate from the last record for next request
289
+ last_record = batch_data[-1]
290
+ if 'lastChangeDate' in last_record:
291
+ current_date_from = last_record['lastChangeDate']
292
+ logger.info(f"Next pagination starts from: {current_date_from}")
293
+ else:
294
+ logger.warning("No lastChangeDate found in response - stopping pagination")
295
+ break
296
+
297
+ iteration += 1
298
 
299
+ if iteration >= max_iterations:
300
+ logger.warning(f"Maximum iterations ({max_iterations}) reached - there may be more data")
 
301
 
302
+ if not all_sales_data:
 
 
 
 
 
 
303
  return pd.DataFrame()
304
 
305
+ # Convert to DataFrame and process
306
+ sales_data = pd.DataFrame(all_sales_data)
307
+ logger.info(f"Total sales records retrieved: {len(sales_data)}")
308
 
309
+ # Process and clean the data using the correct sales data processor
310
+ sales_data = self._process_sales_data(sales_data)
311
 
312
  return sales_data
313
 
 
317
 
318
  def get_stocks(self, date_from: str = None, date_to: str = None) -> pd.DataFrame:
319
  """
320
+ Get current stock levels from Wildberries API with automatic pagination
321
 
322
  Args:
323
+ date_from: Date to get stock levels for (optional, defaults to 2019-06-20 for total stock)
324
  date_to: Not used for stocks endpoint (stocks API uses only dateFrom)
325
 
326
  Returns:
327
  pandas.DataFrame with stock data
328
+
329
+ Note: API limit is 60,000 rows per request. This method handles pagination automatically.
330
  """
331
  endpoint = self.config.get_endpoints()["stocks"]
332
 
333
+ # Use early date to get total stock if no date specified
334
  if not date_from:
335
+ date_from = "2019-06-20" # Early date to get all stocks
336
 
337
+ all_stock_data = []
338
+ current_date_from = date_from
339
+ max_iterations = 30 # Safety limit for stocks (should be less than sales)
340
+ iteration = 0
341
 
342
  try:
343
+ while iteration < max_iterations:
344
+ # Stocks API uses only dateFrom parameter (RFC3339 format)
345
+ params = {"dateFrom": current_date_from}
346
+
347
+ logger.info(f"Fetching stock data from {current_date_from} (iteration {iteration + 1})")
348
+
349
+ response = self._make_request("GET", endpoint, params=params)
350
+
351
+ if not response:
352
+ logger.warning("No stock data returned from API")
353
+ break
354
+
355
+ # Stocks API returns direct array response
356
+ if isinstance(response, list):
357
+ batch_data = response
358
+ else:
359
+ logger.warning("Unexpected API response format for stocks")
360
+ break
361
+
362
+ if not batch_data:
363
+ logger.info("Empty response received - all stock data retrieved")
364
+ break
365
+
366
+ logger.info(f"Retrieved {len(batch_data)} stock records")
367
+ all_stock_data.extend(batch_data)
368
+
369
+ # Check if we need pagination (response has 60,000 rows)
370
+ if len(batch_data) < 60000:
371
+ logger.info("Received less than 60,000 rows - all data retrieved")
372
+ break
373
+
374
+ # Get lastChangeDate from the last record for next request
375
+ last_record = batch_data[-1]
376
+ if 'lastChangeDate' in last_record:
377
+ current_date_from = last_record['lastChangeDate']
378
+ logger.info(f"Next pagination starts from: {current_date_from}")
379
+ else:
380
+ logger.warning("No lastChangeDate found in response - stopping pagination")
381
+ break
382
+
383
+ iteration += 1
384
 
385
+ if iteration >= max_iterations:
386
+ logger.warning(f"Maximum iterations ({max_iterations}) reached - there may be more data")
 
387
 
388
+ if not all_stock_data:
 
 
 
 
 
 
389
  return pd.DataFrame()
390
 
391
+ # Convert to DataFrame and process
392
+ stock_data = pd.DataFrame(all_stock_data)
393
+ logger.info(f"Total stock records retrieved: {len(stock_data)}")
394
 
395
  # Process and clean the stock data using specific stock processing
396
  stock_data = self._process_stock_data(stock_data)
 
449
  raise WildberriesAPIError(f"Failed to fetch orders data: {str(e)}")
450
 
451
  def _process_sales_data(self, df: pd.DataFrame) -> pd.DataFrame:
452
+ """Process and clean sales data from API response (v1 sales endpoint)"""
453
 
454
+ # Column mapping based on actual sales API response structure
455
  column_mapping = {
 
 
 
456
  'date': 'sale_date',
457
  'lastChangeDate': 'last_change_date',
458
  'warehouseName': 'warehouse',
459
+ 'warehouseType': 'warehouse_type',
460
  'countryName': 'country',
461
  'oblastOkrugName': 'region',
462
  'regionName': 'city',
463
+ 'supplierArticle': 'supplier_article',
464
+ 'nmId': 'product_id',
465
+ 'barcode': 'barcode',
466
+ 'category': 'category',
467
+ 'subject': 'subject',
468
+ 'brand': 'brand',
469
+ 'techSize': 'tech_size',
470
  'incomeID': 'income_id',
471
  'isSupply': 'is_supply',
472
  'isRealization': 'is_realization',
473
+ 'totalPrice': 'total_price', # Already total price per item
474
  'discountPercent': 'discount_percent',
475
  'spp': 'spp_discount',
476
+ 'paymentSaleAmount': 'payment_sale_amount',
477
+ 'forPay': 'amount_for_pay', # What seller receives
478
  'finishedPrice': 'finished_price',
479
  'priceWithDisc': 'price_with_discount',
480
+ 'saleID': 'sale_id',
 
481
  'sticker': 'sticker',
482
  'gNumber': 'g_number',
483
+ 'srid': 'unique_id' # Unique identifier for the sale
 
 
484
  }
485
 
486
  # Rename columns that exist
 
495
  df[col] = pd.to_datetime(df[col], errors='coerce')
496
 
497
  # Convert numeric columns
498
+ numeric_columns = [
499
+ 'total_price', 'discount_percent', 'spp_discount', 'payment_sale_amount',
500
+ 'amount_for_pay', 'finished_price', 'price_with_discount', 'income_id'
501
+ ]
502
  for col in numeric_columns:
503
  if col in df.columns:
504
  df[col] = pd.to_numeric(df[col], errors='coerce')
505
 
506
+ # Add product name (use supplier_article as primary)
 
 
 
 
507
  if 'product_name' not in df.columns:
508
+ if 'supplier_article' in df.columns:
509
+ df['product_name'] = df['supplier_article']
510
+ elif 'category' in df.columns:
511
+ df['product_name'] = df['category']
512
  else:
513
  df['product_name'] = 'Unknown Product'
514
 
515
+ # Add quantity (each row represents 1 item sale/return)
516
+ df['quantity'] = 1
517
+
518
+ # Calculate commission (difference between total_price and amount_for_pay)
519
+ if 'total_price' in df.columns and 'amount_for_pay' in df.columns:
520
+ df['sales_commission'] = df['total_price'] - df['amount_for_pay']
521
+ # Handle negative commissions (returns)
522
+ df['sales_commission'] = df['sales_commission'].fillna(0)
523
+
524
+ # Add sale_amount for compatibility (use amount_for_pay as seller's net amount)
525
+ if 'sale_amount' not in df.columns:
526
+ if 'amount_for_pay' in df.columns:
527
+ df['sale_amount'] = df['amount_for_pay']
528
+ else:
529
+ df['sale_amount'] = df['total_price']
530
+
531
+ # Add current_stock for inventory forecasting (default to 0)
532
+ if 'current_stock' not in df.columns:
533
+ df['current_stock'] = 0
534
+
535
+ # Filter out negative total_price (returns) if needed for analysis
536
+ # Note: Keep returns for complete data, but mark them
537
+ if 'total_price' in df.columns:
538
+ df['is_return'] = df['total_price'] < 0
539
+
540
+ logger.info(f"Processed {len(df)} sales records")
541
+
542
  return df
543
 
544
  def _process_stock_data(self, df: pd.DataFrame) -> pd.DataFrame:
545
+ """Process and clean stock data from API response (v1 stocks endpoint)"""
546
 
547
+ # Column mapping based on actual stocks API response structure
548
  column_mapping = {
549
  'lastChangeDate': 'last_change_date',
550
  'warehouseName': 'warehouse',
551
+ 'supplierArticle': 'supplier_article',
552
  'nmId': 'product_id',
553
  'barcode': 'barcode',
554
  'quantity': 'current_stock',
 
560
  'brand': 'brand',
561
  'techSize': 'tech_size',
562
  'Price': 'price',
563
+ 'Discount': 'discount_percent',
564
  'isSupply': 'is_supply',
565
  'isRealization': 'is_realization',
566
  'SCCode': 'sc_code'
 
576
  df['last_change_date'] = pd.to_datetime(df['last_change_date'], errors='coerce')
577
 
578
  # Convert numeric columns
579
+ numeric_columns = [
580
+ 'current_stock', 'in_way_to_client', 'in_way_from_client',
581
+ 'quantity_full', 'price', 'discount_percent', 'product_id'
582
+ ]
583
  for col in numeric_columns:
584
  if col in df.columns:
585
  df[col] = pd.to_numeric(df[col], errors='coerce')
586
 
587
+ # Add product name (use supplier_article as primary)
588
  if 'product_name' not in df.columns:
589
+ if 'supplier_article' in df.columns:
590
+ df['product_name'] = df['supplier_article']
591
+ elif 'subject' in df.columns:
592
+ df['product_name'] = df['subject']
593
+ elif 'category' in df.columns:
594
+ df['product_name'] = df['category']
595
  else:
596
  df['product_name'] = 'Unknown Product'
597
 
598
+ # Calculate discounted price
599
+ if 'price_with_discount' not in df.columns and 'price' in df.columns:
600
+ if 'discount_percent' in df.columns:
601
+ df['price_with_discount'] = df['price'] * (1 - df['discount_percent'] / 100)
602
+ else:
603
+ df['price_with_discount'] = df['price']
604
+
605
+ # Add total_price for compatibility (use price_with_discount)
606
  if 'total_price' not in df.columns:
607
+ if 'price_with_discount' in df.columns:
608
+ df['total_price'] = df['price_with_discount']
609
+ elif 'price' in df.columns:
610
+ df['total_price'] = df['price']
 
 
611
  else:
612
  df['total_price'] = 0
613
 
 
629
  if 'sale_amount' not in df.columns:
630
  df['sale_amount'] = df['total_price'] * df['quantity']
631
 
632
+ # Calculate total inventory value
633
+ if 'inventory_value' not in df.columns:
634
+ df['inventory_value'] = df['total_price'] * df['current_stock']
635
+
636
+ # Add article field for backward compatibility
637
+ if 'article' not in df.columns and 'supplier_article' in df.columns:
638
+ df['article'] = df['supplier_article']
639
+
640
+ # Mark low stock items (less than 5)
641
+ if 'is_low_stock' not in df.columns:
642
+ if 'current_stock' in df.columns:
643
+ df['is_low_stock'] = df['current_stock'] < 5
644
+ else:
645
+ df['is_low_stock'] = True
646
+
647
+ # Calculate pipeline stock (items in transit)
648
+ if 'pipeline_stock' not in df.columns:
649
+ in_way_to_client = df['in_way_to_client'] if 'in_way_to_client' in df.columns else 0
650
+ in_way_from_client = df['in_way_from_client'] if 'in_way_from_client' in df.columns else 0
651
+ df['pipeline_stock'] = in_way_to_client + in_way_from_client
652
+
653
+ logger.info(f"Processed {len(df)} stock records")
654
+
655
  return df
656
 
657
  def _process_reportdetail_data(self, df: pd.DataFrame) -> pd.DataFrame: