Spaces:
Running
Running
update of api connection
Browse files- app.py +14 -3
- config.py +2 -2
- wildberries_client.py +227 -101
app.py
CHANGED
|
@@ -426,8 +426,10 @@ def create_interface():
|
|
| 426 |
|
| 427 |
### π Sales Analytics
|
| 428 |
- **Week Analysis**: Shows sales data for the last 7 days
|
| 429 |
-
- **Month Analysis**: Shows sales data for the last 30 days
|
| 430 |
-
-
|
|
|
|
|
|
|
| 431 |
|
| 432 |
### π¦ Inventory Forecasting
|
| 433 |
Choose from multiple forecasting methods:
|
|
@@ -442,8 +444,17 @@ def create_interface():
|
|
| 442 |
- π‘ **Warning** (7-14 days): Monitor closely
|
| 443 |
- π’ **Safe** (> 14 days): Adequate stock levels
|
| 444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
### π API Information
|
| 446 |
-
This dashboard uses the [Wildberries API](https://dev.wildberries.ru/en/openapi/api-information)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
### π οΈ Technical Details
|
| 449 |
- **Framework**: Gradio + FastMCP
|
|
|
|
| 426 |
|
| 427 |
### π Sales Analytics
|
| 428 |
- **Week Analysis**: Shows sales data for the last 7 days
|
| 429 |
+
- **Month Analysis**: Shows sales data for the last 30 days
|
| 430 |
+
- **Enhanced Metrics**: Commission analysis, net revenue, platform fees
|
| 431 |
+
- **Commission Dashboard**: Detailed commission breakdown by products
|
| 432 |
+
- **Pagination**: Automatically handles large datasets (80,000+ records)
|
| 433 |
|
| 434 |
### π¦ Inventory Forecasting
|
| 435 |
Choose from multiple forecasting methods:
|
|
|
|
| 444 |
- π‘ **Warning** (7-14 days): Monitor closely
|
| 445 |
- π’ **Safe** (> 14 days): Adequate stock levels
|
| 446 |
|
| 447 |
+
### π Data Validation
|
| 448 |
+
- **Consistency Checks**: Automatic validation of data quality
|
| 449 |
+
- **Duplicate Detection**: Identifies duplicate sales records
|
| 450 |
+
- **Data Aggregation**: Performance optimization for large datasets
|
| 451 |
+
|
| 452 |
### π API Information
|
| 453 |
+
This dashboard uses the [Wildberries API](https://dev.wildberries.ru/en/openapi/api-information):
|
| 454 |
+
- **Sales Endpoint**: `/api/v1/supplier/sales` (with automatic pagination)
|
| 455 |
+
- **Stocks Endpoint**: `/api/v1/supplier/stocks`
|
| 456 |
+
- **Rate Limits**: 300 requests/minute (respected automatically)
|
| 457 |
+
- **Data Retention**: Sales data available for 90 days
|
| 458 |
|
| 459 |
### π οΈ Technical Details
|
| 460 |
- **Framework**: Gradio + FastMCP
|
config.py
CHANGED
|
@@ -74,8 +74,8 @@ class Config:
|
|
| 74 |
def get_endpoints(self) -> Dict[str, str]:
|
| 75 |
"""Get API endpoint configurations based on working API calls"""
|
| 76 |
return {
|
| 77 |
-
# Statistics API endpoints -
|
| 78 |
-
"sales": f"{self.wildberries_base_url}/api/
|
| 79 |
"orders": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
|
| 80 |
"stocks": f"{self.wildberries_base_url}/api/v1/supplier/stocks",
|
| 81 |
"incomes": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
|
|
|
|
| 74 |
def get_endpoints(self) -> Dict[str, str]:
|
| 75 |
"""Get API endpoint configurations based on working API calls"""
|
| 76 |
return {
|
| 77 |
+
# Statistics API endpoints - Correct sales endpoint
|
| 78 |
+
"sales": f"{self.wildberries_base_url}/api/v1/supplier/sales",
|
| 79 |
"orders": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
|
| 80 |
"stocks": f"{self.wildberries_base_url}/api/v1/supplier/stocks",
|
| 81 |
"incomes": f"{self.wildberries_base_url}/api/v5/supplier/reportDetailByPeriod",
|
wildberries_client.py
CHANGED
|
@@ -46,26 +46,13 @@ def validate_wb_data(weekly_data: pd.DataFrame, monthly_data: pd.DataFrame) -> D
|
|
| 46 |
f"Data inconsistency: Weekly revenue (β½{weekly_revenue:,.0f}) exceeds monthly revenue (β½{monthly_revenue:,.0f})"
|
| 47 |
)
|
| 48 |
|
| 49 |
-
# Check for
|
| 50 |
for df_name, df in [("weekly", weekly_data), ("monthly", monthly_data)]:
|
| 51 |
-
if not df.empty and '
|
| 52 |
-
|
| 53 |
-
if
|
| 54 |
validation_results["warnings"].append(
|
| 55 |
-
f"{df_name.title()} data: {
|
| 56 |
-
)
|
| 57 |
-
|
| 58 |
-
# Check for zero quantities in sales records
|
| 59 |
-
for df_name, df in [("weekly", weekly_data), ("monthly", monthly_data)]:
|
| 60 |
-
if not df.empty and 'quantity' in df.columns and 'document_type' in df.columns:
|
| 61 |
-
zero_qty_sales = df[
|
| 62 |
-
(df['quantity'] == 0) &
|
| 63 |
-
(df['document_type'].notna()) &
|
| 64 |
-
(df['document_type'] != '')
|
| 65 |
-
].shape[0]
|
| 66 |
-
if zero_qty_sales > 0:
|
| 67 |
-
validation_results["warnings"].append(
|
| 68 |
-
f"{df_name.title()} data: {zero_qty_sales} sales records with zero quantity"
|
| 69 |
)
|
| 70 |
|
| 71 |
except Exception as e:
|
|
@@ -95,12 +82,10 @@ def aggregate_wb_data(df: pd.DataFrame, period: str = 'daily') -> pd.DataFrame:
|
|
| 95 |
}
|
| 96 |
|
| 97 |
# Add optional columns if they exist
|
| 98 |
-
if 'sales_commission' in df.columns:
|
| 99 |
-
agg_functions['sales_commission'] = 'sum'
|
| 100 |
if 'amount_for_pay' in df.columns:
|
| 101 |
agg_functions['amount_for_pay'] = 'sum'
|
| 102 |
-
if '
|
| 103 |
-
agg_functions['
|
| 104 |
|
| 105 |
if period == 'daily':
|
| 106 |
grouped = df.groupby(df['sale_date'].dt.date)
|
|
@@ -245,46 +230,84 @@ class WildberriesAPI:
|
|
| 245 |
logger.error(f"Request failed: {str(e)}")
|
| 246 |
raise WildberriesAPIError(f"Request failed: {str(e)}")
|
| 247 |
|
| 248 |
-
def get_sales(self, date_from: str, date_to: str = None) -> pd.DataFrame:
|
| 249 |
"""
|
| 250 |
-
Get sales data from Wildberries API
|
| 251 |
|
| 252 |
Args:
|
| 253 |
-
date_from: Start date in YYYY-MM-DD format
|
| 254 |
-
date_to: End date in YYYY-MM-DD format (optional)
|
|
|
|
| 255 |
|
| 256 |
Returns:
|
| 257 |
pandas.DataFrame with sales data
|
|
|
|
|
|
|
| 258 |
"""
|
| 259 |
endpoint = self.config.get_endpoints()["sales"]
|
| 260 |
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
params = {"dateFrom": date_from, "dateTo": date_to, "limit": 100}
|
| 266 |
|
| 267 |
try:
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
-
if
|
| 271 |
-
logger.warning("
|
| 272 |
-
return pd.DataFrame()
|
| 273 |
|
| 274 |
-
|
| 275 |
-
if isinstance(response, list):
|
| 276 |
-
sales_data = pd.DataFrame(response)
|
| 277 |
-
elif isinstance(response, dict) and "data" in response:
|
| 278 |
-
sales_data = pd.DataFrame(response["data"])
|
| 279 |
-
else:
|
| 280 |
-
logger.warning("Unexpected API response format")
|
| 281 |
return pd.DataFrame()
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
|
|
|
| 285 |
|
| 286 |
-
# Process and clean the data
|
| 287 |
-
sales_data = self.
|
| 288 |
|
| 289 |
return sales_data
|
| 290 |
|
|
@@ -294,41 +317,80 @@ class WildberriesAPI:
|
|
| 294 |
|
| 295 |
def get_stocks(self, date_from: str = None, date_to: str = None) -> pd.DataFrame:
|
| 296 |
"""
|
| 297 |
-
Get current stock levels from Wildberries API
|
| 298 |
|
| 299 |
Args:
|
| 300 |
-
date_from: Date to get stock levels for (optional, defaults to
|
| 301 |
date_to: Not used for stocks endpoint (stocks API uses only dateFrom)
|
| 302 |
|
| 303 |
Returns:
|
| 304 |
pandas.DataFrame with stock data
|
|
|
|
|
|
|
| 305 |
"""
|
| 306 |
endpoint = self.config.get_endpoints()["stocks"]
|
| 307 |
|
|
|
|
| 308 |
if not date_from:
|
| 309 |
-
date_from =
|
| 310 |
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
| 313 |
|
| 314 |
try:
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
-
if
|
| 318 |
-
logger.warning("
|
| 319 |
-
return pd.DataFrame()
|
| 320 |
|
| 321 |
-
|
| 322 |
-
if isinstance(response, list):
|
| 323 |
-
stock_data = pd.DataFrame(response)
|
| 324 |
-
elif isinstance(response, dict) and "data" in response:
|
| 325 |
-
stock_data = pd.DataFrame(response["data"])
|
| 326 |
-
else:
|
| 327 |
-
logger.warning("Unexpected API response format")
|
| 328 |
return pd.DataFrame()
|
| 329 |
|
| 330 |
-
|
| 331 |
-
|
|
|
|
| 332 |
|
| 333 |
# Process and clean the stock data using specific stock processing
|
| 334 |
stock_data = self._process_stock_data(stock_data)
|
|
@@ -387,36 +449,38 @@ class WildberriesAPI:
|
|
| 387 |
raise WildberriesAPIError(f"Failed to fetch orders data: {str(e)}")
|
| 388 |
|
| 389 |
def _process_sales_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 390 |
-
"""Process and clean sales data from API response"""
|
| 391 |
|
| 392 |
-
#
|
| 393 |
column_mapping = {
|
| 394 |
-
'srid': 'sale_id',
|
| 395 |
-
'supplierArticle': 'article',
|
| 396 |
-
'nmId': 'product_id',
|
| 397 |
'date': 'sale_date',
|
| 398 |
'lastChangeDate': 'last_change_date',
|
| 399 |
'warehouseName': 'warehouse',
|
|
|
|
| 400 |
'countryName': 'country',
|
| 401 |
'oblastOkrugName': 'region',
|
| 402 |
'regionName': 'city',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
'incomeID': 'income_id',
|
| 404 |
'isSupply': 'is_supply',
|
| 405 |
'isRealization': 'is_realization',
|
| 406 |
-
'totalPrice': 'total_price',
|
| 407 |
'discountPercent': 'discount_percent',
|
| 408 |
'spp': 'spp_discount',
|
| 409 |
-
'paymentSaleAmount': '
|
| 410 |
-
'forPay': '
|
| 411 |
'finishedPrice': 'finished_price',
|
| 412 |
'priceWithDisc': 'price_with_discount',
|
| 413 |
-
'saleID': '
|
| 414 |
-
'orderType': 'order_type',
|
| 415 |
'sticker': 'sticker',
|
| 416 |
'gNumber': 'g_number',
|
| 417 |
-
'
|
| 418 |
-
'brand': 'brand',
|
| 419 |
-
'subject': 'category'
|
| 420 |
}
|
| 421 |
|
| 422 |
# Rename columns that exist
|
|
@@ -431,32 +495,60 @@ class WildberriesAPI:
|
|
| 431 |
df[col] = pd.to_datetime(df[col], errors='coerce')
|
| 432 |
|
| 433 |
# Convert numeric columns
|
| 434 |
-
numeric_columns = [
|
|
|
|
|
|
|
|
|
|
| 435 |
for col in numeric_columns:
|
| 436 |
if col in df.columns:
|
| 437 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 438 |
|
| 439 |
-
# Add
|
| 440 |
-
if 'total_price' in df.columns:
|
| 441 |
-
df['quantity'] = 1 # Each row represents one sale
|
| 442 |
-
|
| 443 |
-
# Add product name (if not available, use article)
|
| 444 |
if 'product_name' not in df.columns:
|
| 445 |
-
if '
|
| 446 |
-
df['product_name'] = df['
|
|
|
|
|
|
|
| 447 |
else:
|
| 448 |
df['product_name'] = 'Unknown Product'
|
| 449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
return df
|
| 451 |
|
| 452 |
def _process_stock_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 453 |
-
"""Process and clean stock data from API response"""
|
| 454 |
|
| 455 |
-
#
|
| 456 |
column_mapping = {
|
| 457 |
'lastChangeDate': 'last_change_date',
|
| 458 |
'warehouseName': 'warehouse',
|
| 459 |
-
'supplierArticle': '
|
| 460 |
'nmId': 'product_id',
|
| 461 |
'barcode': 'barcode',
|
| 462 |
'quantity': 'current_stock',
|
|
@@ -468,7 +560,7 @@ class WildberriesAPI:
|
|
| 468 |
'brand': 'brand',
|
| 469 |
'techSize': 'tech_size',
|
| 470 |
'Price': 'price',
|
| 471 |
-
'Discount': '
|
| 472 |
'isSupply': 'is_supply',
|
| 473 |
'isRealization': 'is_realization',
|
| 474 |
'SCCode': 'sc_code'
|
|
@@ -484,27 +576,38 @@ class WildberriesAPI:
|
|
| 484 |
df['last_change_date'] = pd.to_datetime(df['last_change_date'], errors='coerce')
|
| 485 |
|
| 486 |
# Convert numeric columns
|
| 487 |
-
numeric_columns = [
|
|
|
|
|
|
|
|
|
|
| 488 |
for col in numeric_columns:
|
| 489 |
if col in df.columns:
|
| 490 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 491 |
|
| 492 |
-
# Add product name (
|
| 493 |
if 'product_name' not in df.columns:
|
| 494 |
-
if '
|
| 495 |
-
df['product_name'] = df['
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
else:
|
| 497 |
df['product_name'] = 'Unknown Product'
|
| 498 |
|
| 499 |
-
#
|
| 500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
if 'total_price' not in df.columns:
|
| 502 |
-
if '
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
else:
|
| 507 |
-
df['total_price'] = df['price']
|
| 508 |
else:
|
| 509 |
df['total_price'] = 0
|
| 510 |
|
|
@@ -526,6 +629,29 @@ class WildberriesAPI:
|
|
| 526 |
if 'sale_amount' not in df.columns:
|
| 527 |
df['sale_amount'] = df['total_price'] * df['quantity']
|
| 528 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
return df
|
| 530 |
|
| 531 |
def _process_reportdetail_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 46 |
f"Data inconsistency: Weekly revenue (β½{weekly_revenue:,.0f}) exceeds monthly revenue (β½{monthly_revenue:,.0f})"
|
| 47 |
)
|
| 48 |
|
| 49 |
+
# Check for duplicate sales IDs
|
| 50 |
for df_name, df in [("weekly", weekly_data), ("monthly", monthly_data)]:
|
| 51 |
+
if not df.empty and 'srid' in df.columns:
|
| 52 |
+
duplicate_count = df.duplicated('srid').sum()
|
| 53 |
+
if duplicate_count > 0:
|
| 54 |
validation_results["warnings"].append(
|
| 55 |
+
f"{df_name.title()} data: {duplicate_count} duplicate sale IDs found"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
)
|
| 57 |
|
| 58 |
except Exception as e:
|
|
|
|
| 82 |
}
|
| 83 |
|
| 84 |
# Add optional columns if they exist
|
|
|
|
|
|
|
| 85 |
if 'amount_for_pay' in df.columns:
|
| 86 |
agg_functions['amount_for_pay'] = 'sum'
|
| 87 |
+
if 'payment_sale_amount' in df.columns:
|
| 88 |
+
agg_functions['payment_sale_amount'] = 'sum'
|
| 89 |
|
| 90 |
if period == 'daily':
|
| 91 |
grouped = df.groupby(df['sale_date'].dt.date)
|
|
|
|
| 230 |
logger.error(f"Request failed: {str(e)}")
|
| 231 |
raise WildberriesAPIError(f"Request failed: {str(e)}")
|
| 232 |
|
| 233 |
+
def get_sales(self, date_from: str, date_to: str = None, flag: int = 0) -> pd.DataFrame:
|
| 234 |
"""
|
| 235 |
+
Get sales data from Wildberries API with automatic pagination
|
| 236 |
|
| 237 |
Args:
|
| 238 |
+
date_from: Start date in YYYY-MM-DD format or lastChangeDate for pagination
|
| 239 |
+
date_to: End date in YYYY-MM-DD format (optional, not used by sales API)
|
| 240 |
+
flag: 0 for sales and returns, 1 for only sales (optional)
|
| 241 |
|
| 242 |
Returns:
|
| 243 |
pandas.DataFrame with sales data
|
| 244 |
+
|
| 245 |
+
Note: API limit is 80,000 rows per request. This method handles pagination automatically.
|
| 246 |
"""
|
| 247 |
endpoint = self.config.get_endpoints()["sales"]
|
| 248 |
|
| 249 |
+
all_sales_data = []
|
| 250 |
+
current_date_from = date_from
|
| 251 |
+
max_iterations = 50 # Safety limit to prevent infinite loops
|
| 252 |
+
iteration = 0
|
|
|
|
| 253 |
|
| 254 |
try:
|
| 255 |
+
while iteration < max_iterations:
|
| 256 |
+
# Build parameters for sales API
|
| 257 |
+
params = {"dateFrom": current_date_from}
|
| 258 |
+
if flag is not None:
|
| 259 |
+
params["flag"] = flag
|
| 260 |
+
|
| 261 |
+
logger.info(f"Fetching sales data from {current_date_from} (iteration {iteration + 1})")
|
| 262 |
+
|
| 263 |
+
response = self._make_request("GET", endpoint, params=params)
|
| 264 |
+
|
| 265 |
+
if not response:
|
| 266 |
+
logger.warning("No sales data returned from API")
|
| 267 |
+
break
|
| 268 |
+
|
| 269 |
+
# Sales API returns direct array
|
| 270 |
+
if isinstance(response, list):
|
| 271 |
+
batch_data = response
|
| 272 |
+
else:
|
| 273 |
+
logger.warning("Unexpected API response format for sales")
|
| 274 |
+
break
|
| 275 |
+
|
| 276 |
+
if not batch_data:
|
| 277 |
+
logger.info("Empty response received - all sales data retrieved")
|
| 278 |
+
break
|
| 279 |
+
|
| 280 |
+
logger.info(f"Retrieved {len(batch_data)} sales records")
|
| 281 |
+
all_sales_data.extend(batch_data)
|
| 282 |
+
|
| 283 |
+
# Check if we need pagination (response has 80,000 rows)
|
| 284 |
+
if len(batch_data) < 80000:
|
| 285 |
+
logger.info("Received less than 80,000 rows - all data retrieved")
|
| 286 |
+
break
|
| 287 |
+
|
| 288 |
+
# Get lastChangeDate from the last record for next request
|
| 289 |
+
last_record = batch_data[-1]
|
| 290 |
+
if 'lastChangeDate' in last_record:
|
| 291 |
+
current_date_from = last_record['lastChangeDate']
|
| 292 |
+
logger.info(f"Next pagination starts from: {current_date_from}")
|
| 293 |
+
else:
|
| 294 |
+
logger.warning("No lastChangeDate found in response - stopping pagination")
|
| 295 |
+
break
|
| 296 |
+
|
| 297 |
+
iteration += 1
|
| 298 |
|
| 299 |
+
if iteration >= max_iterations:
|
| 300 |
+
logger.warning(f"Maximum iterations ({max_iterations}) reached - there may be more data")
|
|
|
|
| 301 |
|
| 302 |
+
if not all_sales_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
return pd.DataFrame()
|
| 304 |
|
| 305 |
+
# Convert to DataFrame and process
|
| 306 |
+
sales_data = pd.DataFrame(all_sales_data)
|
| 307 |
+
logger.info(f"Total sales records retrieved: {len(sales_data)}")
|
| 308 |
|
| 309 |
+
# Process and clean the data using the correct sales data processor
|
| 310 |
+
sales_data = self._process_sales_data(sales_data)
|
| 311 |
|
| 312 |
return sales_data
|
| 313 |
|
|
|
|
| 317 |
|
| 318 |
def get_stocks(self, date_from: str = None, date_to: str = None) -> pd.DataFrame:
|
| 319 |
"""
|
| 320 |
+
Get current stock levels from Wildberries API with automatic pagination
|
| 321 |
|
| 322 |
Args:
|
| 323 |
+
date_from: Date to get stock levels for (optional, defaults to 2019-06-20 for total stock)
|
| 324 |
date_to: Not used for stocks endpoint (stocks API uses only dateFrom)
|
| 325 |
|
| 326 |
Returns:
|
| 327 |
pandas.DataFrame with stock data
|
| 328 |
+
|
| 329 |
+
Note: API limit is 60,000 rows per request. This method handles pagination automatically.
|
| 330 |
"""
|
| 331 |
endpoint = self.config.get_endpoints()["stocks"]
|
| 332 |
|
| 333 |
+
# Use early date to get total stock if no date specified
|
| 334 |
if not date_from:
|
| 335 |
+
date_from = "2019-06-20" # Early date to get all stocks
|
| 336 |
|
| 337 |
+
all_stock_data = []
|
| 338 |
+
current_date_from = date_from
|
| 339 |
+
max_iterations = 30 # Safety limit for stocks (should be less than sales)
|
| 340 |
+
iteration = 0
|
| 341 |
|
| 342 |
try:
|
| 343 |
+
while iteration < max_iterations:
|
| 344 |
+
# Stocks API uses only dateFrom parameter (RFC3339 format)
|
| 345 |
+
params = {"dateFrom": current_date_from}
|
| 346 |
+
|
| 347 |
+
logger.info(f"Fetching stock data from {current_date_from} (iteration {iteration + 1})")
|
| 348 |
+
|
| 349 |
+
response = self._make_request("GET", endpoint, params=params)
|
| 350 |
+
|
| 351 |
+
if not response:
|
| 352 |
+
logger.warning("No stock data returned from API")
|
| 353 |
+
break
|
| 354 |
+
|
| 355 |
+
# Stocks API returns direct array response
|
| 356 |
+
if isinstance(response, list):
|
| 357 |
+
batch_data = response
|
| 358 |
+
else:
|
| 359 |
+
logger.warning("Unexpected API response format for stocks")
|
| 360 |
+
break
|
| 361 |
+
|
| 362 |
+
if not batch_data:
|
| 363 |
+
logger.info("Empty response received - all stock data retrieved")
|
| 364 |
+
break
|
| 365 |
+
|
| 366 |
+
logger.info(f"Retrieved {len(batch_data)} stock records")
|
| 367 |
+
all_stock_data.extend(batch_data)
|
| 368 |
+
|
| 369 |
+
# Check if we need pagination (response has 60,000 rows)
|
| 370 |
+
if len(batch_data) < 60000:
|
| 371 |
+
logger.info("Received less than 60,000 rows - all data retrieved")
|
| 372 |
+
break
|
| 373 |
+
|
| 374 |
+
# Get lastChangeDate from the last record for next request
|
| 375 |
+
last_record = batch_data[-1]
|
| 376 |
+
if 'lastChangeDate' in last_record:
|
| 377 |
+
current_date_from = last_record['lastChangeDate']
|
| 378 |
+
logger.info(f"Next pagination starts from: {current_date_from}")
|
| 379 |
+
else:
|
| 380 |
+
logger.warning("No lastChangeDate found in response - stopping pagination")
|
| 381 |
+
break
|
| 382 |
+
|
| 383 |
+
iteration += 1
|
| 384 |
|
| 385 |
+
if iteration >= max_iterations:
|
| 386 |
+
logger.warning(f"Maximum iterations ({max_iterations}) reached - there may be more data")
|
|
|
|
| 387 |
|
| 388 |
+
if not all_stock_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
return pd.DataFrame()
|
| 390 |
|
| 391 |
+
# Convert to DataFrame and process
|
| 392 |
+
stock_data = pd.DataFrame(all_stock_data)
|
| 393 |
+
logger.info(f"Total stock records retrieved: {len(stock_data)}")
|
| 394 |
|
| 395 |
# Process and clean the stock data using specific stock processing
|
| 396 |
stock_data = self._process_stock_data(stock_data)
|
|
|
|
| 449 |
raise WildberriesAPIError(f"Failed to fetch orders data: {str(e)}")
|
| 450 |
|
| 451 |
def _process_sales_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 452 |
+
"""Process and clean sales data from API response (v1 sales endpoint)"""
|
| 453 |
|
| 454 |
+
# Column mapping based on actual sales API response structure
|
| 455 |
column_mapping = {
|
|
|
|
|
|
|
|
|
|
| 456 |
'date': 'sale_date',
|
| 457 |
'lastChangeDate': 'last_change_date',
|
| 458 |
'warehouseName': 'warehouse',
|
| 459 |
+
'warehouseType': 'warehouse_type',
|
| 460 |
'countryName': 'country',
|
| 461 |
'oblastOkrugName': 'region',
|
| 462 |
'regionName': 'city',
|
| 463 |
+
'supplierArticle': 'supplier_article',
|
| 464 |
+
'nmId': 'product_id',
|
| 465 |
+
'barcode': 'barcode',
|
| 466 |
+
'category': 'category',
|
| 467 |
+
'subject': 'subject',
|
| 468 |
+
'brand': 'brand',
|
| 469 |
+
'techSize': 'tech_size',
|
| 470 |
'incomeID': 'income_id',
|
| 471 |
'isSupply': 'is_supply',
|
| 472 |
'isRealization': 'is_realization',
|
| 473 |
+
'totalPrice': 'total_price', # Already total price per item
|
| 474 |
'discountPercent': 'discount_percent',
|
| 475 |
'spp': 'spp_discount',
|
| 476 |
+
'paymentSaleAmount': 'payment_sale_amount',
|
| 477 |
+
'forPay': 'amount_for_pay', # What seller receives
|
| 478 |
'finishedPrice': 'finished_price',
|
| 479 |
'priceWithDisc': 'price_with_discount',
|
| 480 |
+
'saleID': 'sale_id',
|
|
|
|
| 481 |
'sticker': 'sticker',
|
| 482 |
'gNumber': 'g_number',
|
| 483 |
+
'srid': 'unique_id' # Unique identifier for the sale
|
|
|
|
|
|
|
| 484 |
}
|
| 485 |
|
| 486 |
# Rename columns that exist
|
|
|
|
| 495 |
df[col] = pd.to_datetime(df[col], errors='coerce')
|
| 496 |
|
| 497 |
# Convert numeric columns
|
| 498 |
+
numeric_columns = [
|
| 499 |
+
'total_price', 'discount_percent', 'spp_discount', 'payment_sale_amount',
|
| 500 |
+
'amount_for_pay', 'finished_price', 'price_with_discount', 'income_id'
|
| 501 |
+
]
|
| 502 |
for col in numeric_columns:
|
| 503 |
if col in df.columns:
|
| 504 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 505 |
|
| 506 |
+
# Add product name (use supplier_article as primary)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
if 'product_name' not in df.columns:
|
| 508 |
+
if 'supplier_article' in df.columns:
|
| 509 |
+
df['product_name'] = df['supplier_article']
|
| 510 |
+
elif 'category' in df.columns:
|
| 511 |
+
df['product_name'] = df['category']
|
| 512 |
else:
|
| 513 |
df['product_name'] = 'Unknown Product'
|
| 514 |
|
| 515 |
+
# Add quantity (each row represents 1 item sale/return)
|
| 516 |
+
df['quantity'] = 1
|
| 517 |
+
|
| 518 |
+
# Calculate commission (difference between total_price and amount_for_pay)
|
| 519 |
+
if 'total_price' in df.columns and 'amount_for_pay' in df.columns:
|
| 520 |
+
df['sales_commission'] = df['total_price'] - df['amount_for_pay']
|
| 521 |
+
# Handle negative commissions (returns)
|
| 522 |
+
df['sales_commission'] = df['sales_commission'].fillna(0)
|
| 523 |
+
|
| 524 |
+
# Add sale_amount for compatibility (use amount_for_pay as seller's net amount)
|
| 525 |
+
if 'sale_amount' not in df.columns:
|
| 526 |
+
if 'amount_for_pay' in df.columns:
|
| 527 |
+
df['sale_amount'] = df['amount_for_pay']
|
| 528 |
+
else:
|
| 529 |
+
df['sale_amount'] = df['total_price']
|
| 530 |
+
|
| 531 |
+
# Add current_stock for inventory forecasting (default to 0)
|
| 532 |
+
if 'current_stock' not in df.columns:
|
| 533 |
+
df['current_stock'] = 0
|
| 534 |
+
|
| 535 |
+
# Filter out negative total_price (returns) if needed for analysis
|
| 536 |
+
# Note: Keep returns for complete data, but mark them
|
| 537 |
+
if 'total_price' in df.columns:
|
| 538 |
+
df['is_return'] = df['total_price'] < 0
|
| 539 |
+
|
| 540 |
+
logger.info(f"Processed {len(df)} sales records")
|
| 541 |
+
|
| 542 |
return df
|
| 543 |
|
| 544 |
def _process_stock_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 545 |
+
"""Process and clean stock data from API response (v1 stocks endpoint)"""
|
| 546 |
|
| 547 |
+
# Column mapping based on actual stocks API response structure
|
| 548 |
column_mapping = {
|
| 549 |
'lastChangeDate': 'last_change_date',
|
| 550 |
'warehouseName': 'warehouse',
|
| 551 |
+
'supplierArticle': 'supplier_article',
|
| 552 |
'nmId': 'product_id',
|
| 553 |
'barcode': 'barcode',
|
| 554 |
'quantity': 'current_stock',
|
|
|
|
| 560 |
'brand': 'brand',
|
| 561 |
'techSize': 'tech_size',
|
| 562 |
'Price': 'price',
|
| 563 |
+
'Discount': 'discount_percent',
|
| 564 |
'isSupply': 'is_supply',
|
| 565 |
'isRealization': 'is_realization',
|
| 566 |
'SCCode': 'sc_code'
|
|
|
|
| 576 |
df['last_change_date'] = pd.to_datetime(df['last_change_date'], errors='coerce')
|
| 577 |
|
| 578 |
# Convert numeric columns
|
| 579 |
+
numeric_columns = [
|
| 580 |
+
'current_stock', 'in_way_to_client', 'in_way_from_client',
|
| 581 |
+
'quantity_full', 'price', 'discount_percent', 'product_id'
|
| 582 |
+
]
|
| 583 |
for col in numeric_columns:
|
| 584 |
if col in df.columns:
|
| 585 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 586 |
|
| 587 |
+
# Add product name (use supplier_article as primary)
|
| 588 |
if 'product_name' not in df.columns:
|
| 589 |
+
if 'supplier_article' in df.columns:
|
| 590 |
+
df['product_name'] = df['supplier_article']
|
| 591 |
+
elif 'subject' in df.columns:
|
| 592 |
+
df['product_name'] = df['subject']
|
| 593 |
+
elif 'category' in df.columns:
|
| 594 |
+
df['product_name'] = df['category']
|
| 595 |
else:
|
| 596 |
df['product_name'] = 'Unknown Product'
|
| 597 |
|
| 598 |
+
# Calculate discounted price
|
| 599 |
+
if 'price_with_discount' not in df.columns and 'price' in df.columns:
|
| 600 |
+
if 'discount_percent' in df.columns:
|
| 601 |
+
df['price_with_discount'] = df['price'] * (1 - df['discount_percent'] / 100)
|
| 602 |
+
else:
|
| 603 |
+
df['price_with_discount'] = df['price']
|
| 604 |
+
|
| 605 |
+
# Add total_price for compatibility (use price_with_discount)
|
| 606 |
if 'total_price' not in df.columns:
|
| 607 |
+
if 'price_with_discount' in df.columns:
|
| 608 |
+
df['total_price'] = df['price_with_discount']
|
| 609 |
+
elif 'price' in df.columns:
|
| 610 |
+
df['total_price'] = df['price']
|
|
|
|
|
|
|
| 611 |
else:
|
| 612 |
df['total_price'] = 0
|
| 613 |
|
|
|
|
| 629 |
if 'sale_amount' not in df.columns:
|
| 630 |
df['sale_amount'] = df['total_price'] * df['quantity']
|
| 631 |
|
| 632 |
+
# Calculate total inventory value
|
| 633 |
+
if 'inventory_value' not in df.columns:
|
| 634 |
+
df['inventory_value'] = df['total_price'] * df['current_stock']
|
| 635 |
+
|
| 636 |
+
# Add article field for backward compatibility
|
| 637 |
+
if 'article' not in df.columns and 'supplier_article' in df.columns:
|
| 638 |
+
df['article'] = df['supplier_article']
|
| 639 |
+
|
| 640 |
+
# Mark low stock items (less than 5)
|
| 641 |
+
if 'is_low_stock' not in df.columns:
|
| 642 |
+
if 'current_stock' in df.columns:
|
| 643 |
+
df['is_low_stock'] = df['current_stock'] < 5
|
| 644 |
+
else:
|
| 645 |
+
df['is_low_stock'] = True
|
| 646 |
+
|
| 647 |
+
# Calculate pipeline stock (items in transit)
|
| 648 |
+
if 'pipeline_stock' not in df.columns:
|
| 649 |
+
in_way_to_client = df['in_way_to_client'] if 'in_way_to_client' in df.columns else 0
|
| 650 |
+
in_way_from_client = df['in_way_from_client'] if 'in_way_from_client' in df.columns else 0
|
| 651 |
+
df['pipeline_stock'] = in_way_to_client + in_way_from_client
|
| 652 |
+
|
| 653 |
+
logger.info(f"Processed {len(df)} stock records")
|
| 654 |
+
|
| 655 |
return df
|
| 656 |
|
| 657 |
def _process_reportdetail_data(self, df: pd.DataFrame) -> pd.DataFrame:
|