File size: 13,219 Bytes
d80bf0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399d51d
 
 
 
 
 
 
 
 
 
 
d80bf0f
399d51d
d80bf0f
 
 
 
 
 
 
 
 
399d51d
 
 
d80bf0f
 
 
 
399d51d
9c6c702
d80bf0f
 
 
 
 
399d51d
 
d80bf0f
 
 
 
 
 
9c6c702
 
 
 
d80bf0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
"""
Utility functions and demo data for the Wildberries Analytics Dashboard
Includes fallback data for when API is not available
"""

import pandas as pd
import numpy as np
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any
import random
from config import get_config, DEMO_SETTINGS

def load_demo_sales_data(period: str = "week") -> pd.DataFrame:
    """Generate realistic demo sales data for testing"""
    
    # Set random seed for reproducible results
    np.random.seed(42)
    random.seed(42)
    
    config = get_config()
    demo_config = DEMO_SETTINGS
    
    # Calculate date range
    if period == "week":
        days = 7
        num_sales = random.randint(50, 200)
    elif period == "month":
        days = 30
        num_sales = random.randint(200, 800)
    else:
        days = 7
        num_sales = random.randint(50, 200)
    
    end_date = datetime.now()
    start_date = end_date - timedelta(days=days)
    
    # Generate product list
    products = []
    for i in range(demo_config["demo_products_count"]):
        products.append({
            'product_id': 1000000 + i,
            'product_name': f'Товар {i+1}',
            'article': f'ART{1000+i}',
            'category': random.choice(demo_config["demo_categories"]),
            'brand': f'Бренд {chr(65 + i % 26)}',
            'base_price': random.randint(500, 5000)
        })
    
    # Generate sales data
    sales_data = []
    
    for _ in range(num_sales):
        product = random.choice(products)
        sale_date = start_date + timedelta(
            days=random.random() * days,
            hours=random.randint(0, 23),
            minutes=random.randint(0, 59)
        )
        
        # Generate realistic pricing with discounts
        base_price = product['base_price']
        discount_percent = random.choice([0, 5, 10, 15, 20, 25, 30]) if random.random() < 0.6 else 0
        price_with_discount = base_price * (1 - discount_percent / 100)
        
        # Generate additional pricing fields
        spp_discount = random.randint(0, 10) if random.random() < 0.3 else 0
        finished_price = price_with_discount * (1 - spp_discount / 100)
        
        # Determine if this is a return (10% chance)
        is_return = random.random() < 0.1
        sale_id = f'R{random.randint(100000, 999999)}' if is_return else f'S{random.randint(100000, 999999)}'
        
        # Calculate amount_for_pay based on priceWithDisc (what seller receives from forPay field)
        # This comes directly from the forPay API field, excluding returns
        if is_return:
            amount_for_pay = 0  # Returns don't generate payout for seller
        else:
            amount_for_pay = price_with_discount * 0.75  # What seller receives (forPay equivalent)
        
        sales_data.append({
            'sale_id': sale_id,
            'product_id': product['product_id'],
            'product_name': product['product_name'],
            'article': product['article'],
            'sale_date': sale_date,
            'last_change_date': sale_date,
            'warehouse': random.choice(['Коледино', 'Электросталь', 'Тула', 'Казань']),
            'country': 'Россия',
            'region': random.choice(['Московская', 'Санкт-Петербургская', 'Свердловская', 'Татарстан']),
            'city': random.choice(['Москва', 'Санкт-Петербург', 'Екатеринбург', 'Казань']),
            'total_price': price_with_discount,  # Use priceWithDisc for revenue
            'original_price': base_price,  # totalPrice
            'finished_price': finished_price,  # finishedPrice
            'discount_percent': discount_percent,
            'spp_discount': spp_discount,
            'price_with_discount': price_with_discount,
            'sale_amount': finished_price,
            'amount_for_pay': amount_for_pay,  # From forPay field, 0 for returns
            'sales_commission': price_with_discount - amount_for_pay,  # Commission = total_price - amount_for_pay
            'quantity': 1,
            'category': product['category'],
            'brand': product['brand'],
            'is_supply': True,
            'is_realization': True,
            'is_return': is_return,
            'order_type': 'Возвратный' if is_return else 'Клиентский'
        })
    
    df = pd.DataFrame(sales_data)
    df['sale_date'] = pd.to_datetime(df['sale_date'])
    df['last_change_date'] = pd.to_datetime(df['last_change_date'])
    
    # Ensure sales_commission is never negative and is 0 for returns
    df['sales_commission'] = df['sales_commission'].clip(lower=0)
    df.loc[df['is_return'], 'sales_commission'] = 0
    
    return df

def load_demo_inventory_data() -> pd.DataFrame:
    """Generate realistic demo inventory data"""
    
    np.random.seed(42)
    random.seed(42)
    
    demo_config = DEMO_SETTINGS
    
    # Generate inventory data
    inventory_data = []
    
    for i in range(demo_config["demo_products_count"]):
        # Generate realistic stock levels
        stock_level = random.randint(*demo_config["demo_stock_range"])
        
        # Some products should be low stock for demonstration
        if i < 3:  # First 3 products are low stock
            stock_level = random.randint(0, 10)
        elif i < 6:  # Next 3 are medium stock
            stock_level = random.randint(10, 50)
        
        inventory_data.append({
            'product_id': 1000000 + i,
            'product_name': f'Товар {i+1}',
            'article': f'ART{1000+i}',
            'current_stock': stock_level,
            'in_way_to_client': random.randint(0, 20),
            'in_way_from_client': random.randint(0, 5),
            'warehouse': random.choice(['Коледино', 'Электросталь', 'Тула', 'Казань']),
            'category': random.choice(demo_config["demo_categories"]),
            'brand': f'Бренд {chr(65 + i % 26)}',
            'price': random.randint(500, 5000),
            'last_change_date': datetime.now() - timedelta(days=random.randint(0, 3))
        })
    
    df = pd.DataFrame(inventory_data)
    df['last_change_date'] = pd.to_datetime(df['last_change_date'])
    
    return df

def process_sales_data(data: pd.DataFrame) -> pd.DataFrame:
    """Process and validate sales data from API or demo"""
    
    if data.empty:
        return data
    
    # Ensure required columns exist
    required_columns = ['product_id', 'product_name', 'sale_date', 'total_price', 'quantity']
    
    for col in required_columns:
        if col not in data.columns:
            if col == 'quantity':
                data[col] = 1  # Default quantity
            elif col == 'product_name' and 'article' in data.columns:
                data[col] = data['article']
            else:
                data[col] = f'Unknown {col}'
    
    # Data validation and cleaning
    data = data.copy()
    
    # Remove rows with missing critical data
    data = data.dropna(subset=['product_id', 'total_price'])
    
    # Ensure numeric columns are numeric
    numeric_columns = ['total_price', 'quantity', 'sale_amount', 'finished_price']
    for col in numeric_columns:
        if col in data.columns:
            data[col] = pd.to_numeric(data[col], errors='coerce')
            data[col] = data[col].fillna(0)
    
    # Ensure positive values
    for col in ['total_price', 'quantity']:
        if col in data.columns:
            data[col] = data[col].abs()
    
    # Sort by date
    if 'sale_date' in data.columns:
        data = data.sort_values('sale_date')
    
    return data

def calculate_daily_sales(sales_data: pd.DataFrame, product_id: int = None) -> pd.Series:
    """Calculate daily sales for a product or all products"""
    
    if sales_data.empty:
        return pd.Series()
    
    if product_id:
        sales_data = sales_data[sales_data['product_id'] == product_id]
    
    if 'sale_date' not in sales_data.columns:
        return pd.Series()
    
    # Group by date and sum quantities
    daily_sales = sales_data.groupby(sales_data['sale_date'].dt.date)['quantity'].sum()
    
    return daily_sales

def get_product_performance_metrics(sales_data: pd.DataFrame) -> pd.DataFrame:
    """Calculate performance metrics for each product"""
    
    if sales_data.empty:
        return pd.DataFrame()
    
    # Group by product
    product_metrics = sales_data.groupby(['product_id', 'product_name']).agg({
        'quantity': 'sum',
        'total_price': 'sum',
        'sale_date': ['count', 'min', 'max']
    }).round(2)
    
    # Flatten column names
    product_metrics.columns = ['total_quantity', 'total_revenue', 'total_orders', 'first_sale', 'last_sale']
    
    # Calculate additional metrics
    product_metrics['avg_order_value'] = (product_metrics['total_revenue'] / product_metrics['total_orders']).round(2)
    product_metrics['avg_daily_sales'] = product_metrics['total_quantity'] / 30  # Assuming 30-day period
    
    # Reset index to make product info regular columns
    product_metrics = product_metrics.reset_index()
    
    return product_metrics

def validate_api_response(response: Dict[str, Any], required_fields: List[str] = None) -> bool:
    """Validate API response structure"""
    
    if not isinstance(response, dict):
        return False
    
    if required_fields:
        for field in required_fields:
            if field not in response:
                return False
    
    return True

def format_currency(amount: float, currency: str = "₽") -> str:
    """Format currency amounts for display"""
    if pd.isna(amount) or amount is None:
        return f"0 {currency}"
    
    return f"{amount:,.2f} {currency}"

def format_number(number: float, decimals: int = 0) -> str:
    """Format numbers with thousand separators"""
    if pd.isna(number) or number is None:
        return "0"
    
    if decimals > 0:
        return f"{number:,.{decimals}f}"
    else:
        return f"{number:,.0f}"

def get_risk_color(days_until_stockout: float) -> str:
    """Get color code for risk level"""
    if days_until_stockout < 7:
        return "#ff4444"  # Red
    elif days_until_stockout < 14:
        return "#ffaa00"  # Orange
    else:
        return "#44aa44"  # Green

def export_to_csv(data: pd.DataFrame, filename: str = None) -> str:
    """Export DataFrame to CSV and return filename"""
    
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"wildberries_data_{timestamp}.csv"
    
    # Clean data for export
    export_data = data.copy()
    
    # Convert datetime columns to strings
    for col in export_data.columns:
        if export_data[col].dtype == 'datetime64[ns]':
            export_data[col] = export_data[col].dt.strftime('%Y-%m-%d %H:%M:%S')
    
    # Save to CSV
    export_data.to_csv(filename, index=False, encoding='utf-8')
    
    return filename

def create_sample_data_file():
    """Create sample data JSON file for the examples directory"""
    
    sample_sales = load_demo_sales_data("week")
    sample_inventory = load_demo_inventory_data()
    
    sample_data = {
        "sales_data": sample_sales.head(10).to_dict('records'),
        "inventory_data": sample_inventory.head(10).to_dict('records'),
        "metadata": {
            "generated_at": datetime.now().isoformat(),
            "description": "Sample data for Wildberries Analytics Dashboard",
            "note": "This is demo data for testing purposes only"
        }
    }
    
    # Convert datetime objects to strings for JSON serialization
    for item in sample_data["sales_data"]:
        for key, value in item.items():
            if isinstance(value, (datetime, pd.Timestamp)):
                item[key] = value.isoformat()
    
    for item in sample_data["inventory_data"]:
        for key, value in item.items():
            if isinstance(value, (datetime, pd.Timestamp)):
                item[key] = value.isoformat()
    
    return sample_data

# Cache for demo data to avoid regenerating it multiple times
_demo_sales_cache = {}
_demo_inventory_cache = None

def get_cached_demo_sales(period: str) -> pd.DataFrame:
    """Get cached demo sales data to ensure consistency across calls"""
    global _demo_sales_cache
    
    if period not in _demo_sales_cache:
        _demo_sales_cache[period] = load_demo_sales_data(period)
    
    return _demo_sales_cache[period].copy()

def get_cached_demo_inventory() -> pd.DataFrame:
    """Get cached demo inventory data to ensure consistency across calls"""
    global _demo_inventory_cache
    
    if _demo_inventory_cache is None:
        _demo_inventory_cache = load_demo_inventory_data()
    
    return _demo_inventory_cache.copy()

# Update the main functions to use cached data
def load_demo_sales_data_cached(period: str = "week") -> pd.DataFrame:
    """Load demo sales data with caching"""
    return get_cached_demo_sales(period)

def load_demo_inventory_data_cached() -> pd.DataFrame:
    """Load demo inventory data with caching"""
    return get_cached_demo_inventory()