Genesis AI — Full Code Flow

1

User clicks ↑ Upload button

Frontend

Header.jsx → UploadModal.jsx

Code that runs (Header.jsx)

const [uploadOpen, setUploadOpen] = useState(false)

// Button in header
<button onClick={() => setUploadOpen(true)}>
  ↑ Upload
</button>

// Opens modal
<UploadModal
  open={uploadOpen}
  onClose={() => setUploadOpen(false)}
  onSuccess={handleSuccess}
/>

What happens

setUploadOpen(true) triggers React state update
UploadModal component mounts and becomes visible
On mount, UploadModal calls POST /api/upload/reset to clear any previous job state on the server
User sees drag-and-drop zone for .xlsx file

2

Browser resets server job state

APIPython

UploadModal.jsx → upload_server.py

Frontend call (UploadModal.jsx)

// Called when modal opens
useEffect(() => {
  if (open) {
    fetch('/api/upload/reset', { method: 'POST' })
  }
}, [open])

Python handler (upload_server.py)

@app.post("/api/upload/reset")
def reset_job():
    with _job_lock:
        _job["status"]   = "idle"
        _job["message"]  = ""
        _job["logs"]     = ""
        _job["filename"] = None
    return {"ok": True}

3

User drops / picks .xlsx file → POST /api/upload

APIPython

UploadModal.jsx → upload_server.py

Frontend call (UploadModal.jsx)

async function handleFile(file) {
  const form = new FormData()
  form.append('file', file)

  const res = await fetch('/api/upload', {
    method: 'POST',
    body: form          // multipart/form-data
  })
  const json = await res.json()
  // → { ok: true, filename: "..." }

  setPhase('running')
  startPolling()     // begins GET /status every 1.5s
}

Python handler (upload_server.py)

@app.post("/api/upload")
async def upload_excel(file: UploadFile):
    # Save file to disk
    dest = UPLOAD_DIR / file.filename
    dest.write_bytes(await file.read())

    # Launch pipeline in background thread
    thread = threading.Thread(
        target=_run_pipeline,
        args=(dest,), daemon=True
    )
    thread.start()

    # Return immediately — don't wait for pipeline
    return {"ok": True, "filename": file.filename}

4

Background thread launches Python pipeline subprocess

Python

upload_server.py → pipeline/run.py

Code (upload_server.py → _run_pipeline)

def _run_pipeline(xlsx_path):
    pipeline_python = _find_pipeline_python()
    # finds Python 3.13 that has pandas

    cmd = [
        pipeline_python, "-m", "pipeline.run",
        "--input",  str(xlsx_path),
        "--output", str(OUTPUT_DIR),
    ]
    env = os.environ.copy()
    env["PYTHONIOENCODING"] = "utf-8"

    # Stream output line-by-line to terminal
    proc = subprocess.Popen(
        cmd, cwd=str(BASE_DIR),
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True, env=env,
    )
    for line in proc.stdout:
        _annotate(line, xlsx_path.name) # print step headers
        _tprint(line)                   # live terminal output

While pipeline runs — browser polls

// UploadModal.jsx polls every 1500ms
async function poll() {
  const s = await fetch('/api/upload/status')
  const l = await fetch('/api/upload/logs')
  // shows live logs in modal
  if (s.status === 'done')  stopPolling()
  if (s.status === 'error') stopPolling()
}

→ GET /api/upload/status returns { status, message, elapsed_s }
→ GET /api/upload/logs returns { logs } — live pipeline stdout

5

Load Excel data — pipeline/loader.py

Python

pipeline/loader.py

load_source_data( )

def load_source_data(path):
    # Opens sheet 1_Source_Data
    raw = pd.read_excel(path,
        sheet_name="1_Source_Data",
        header=None)

    # Finds header row (where col[1] == "Brand")
    hdr_row = next(i for i, r in raw.iterrows()
        if str(r.iloc[1]).strip().lower() == "brand")

    df = pd.read_excel(path,
        sheet_name="1_Source_Data",
        skiprows=hdr_row, header=0)

    # Computes Price = Value / Volume
    df["Price"] = df["Value"] / df["Volume"]
    df["Date"]  = pd.to_datetime(df["Date"])
    df["Month"] = df["Date"].dt.to_period("M")
    return df  # 3,207 rows

read_brand_config( )

def read_brand_config(path):
    # Opens sheet 2_Brand_Config
    raw = pd.read_excel(path,
        sheet_name="2_Brand_Config",
        header=None)

    # Reads focal brand name
    focal1 = _get("Focal Brand Name")

    # Reads competitor brand names
    for _, row in raw.iterrows():
        slot = str(row.iloc[1]).strip()
        if slot.startswith("Comp"):
            config["competitors"].append(row.iloc[2])

    return config
    # → { focal_brands: ['DABUR SARSON AMLA'],
    #     competitors: ['NIHAR...','PARACHUTE'] }

Reads sheet 1: 3,207 rows of monthly brand/channel/pack sales data
Reads sheet 2: focal brand name + competitor list
Output: clean df DataFrame passed to all subsequent steps

6

Build wide pivot tables — pipeline/modelling.py

Python

pipeline/modelling.py → build_wide_pivot()

build_wide_pivot( ) — called per Channel × Region

def build_wide_pivot(df, focal, competitors,
                     channel, region, pack_order):
    sub = df[(df["Channel"] == channel)
           & (df["Region"]  == region)]

    # Focal brand columns
    focal_df = sub[sub["Brand"] == focal].rename(columns={
        "Price": "Price_F", "Volume": "Vol_F",
        "Distribution": "Dist_F", "Value": "Val_F",
    })

    # Merge competitor columns
    for comp in competitors:
        short = comp[:8].replace(" ", "_")
        comp_df = sub[sub["Brand"] == comp].rename(columns={
            "Price": f"Price_{short}",
            "Volume": f"Vol_{short}",
        })
        m = m.merge(comp_df, on=["Month","Pack Size Group"])

    # Category volume = sum of all brands
    cat = sub.groupby(["Month","Pack Size Group"])["Volume"]
            .sum().rename("Cat_Vol")

    # Adjacent pack volumes (for cannibalization)
    m["Vol_Up"]   = # volume of next larger pack
    m["Vol_Down"] = # volume of next smaller pack
    return m  # 674 rows, 33 grains total

Columns in the output DataFrame

Focal brand: Vol_F, Price_F, Dist_F, Val_F
Per competitor: Price_NIHAR__, Vol_NIHAR__, Dist_NIHAR__ etc.
Category: Cat_Vol — sum of ALL brands at same CH/RG/Pack/Month
Cannibalization: Vol_Up, Vol_Down — adjacent pack volumes
Result: 33 grains × 24 months = 674 rows total

7

Run OLS elasticity models — pipeline/modelling.py

Python

pipeline/modelling.py → run_elasticity_models()

run_elasticity_models( ) — tests 343 spec combinations

def run_elasticity_models(all_data, competitors, pack_order):
    for (fb, ch, rg, ps), g in grains:
        # Build log-transformed variables
        log_vol  = np.log(g["Vol_F"].clip(1e-9))
        log_p    = np.log(g["Price_F"].clip(1e-9))
        log_dist = np.log(g["Dist_F"].clip(1e-9))
        log_cat  = np.log(g["Cat_Vol"].clip(1e-9))

        # Test every combination of specs
        for use_seasonal in (False, True):
          for combo in comp_combos:  # none, 1 comp, 2 comps, 3 comps
            X = [log_p, log_dist, log_cat, *combo]
            res = ols(log_vol, X)

            # Sign guardrail: own_e < 0 AND dist_e > 0
            sign_ok = res.own_e < 0 and res.dist_e > 0

        # Select best Adj-R² with correct sign
        best = ok_specs.loc[ok_specs["AdjR2"].idxmax()]

ols( ) — core OLS estimator

def ols(y, X):
    Xc = np.column_stack([np.ones(n), X])

    # Solve via numpy least squares
    betas, _, _, _ = np.linalg.lstsq(Xc, y)

    # Compute R², Adj-R², t-stats, p-values
    y_hat  = Xc @ betas
    sse    = ((y - y_hat)**2).sum()
    sst    = ((y - y.mean())**2).sum()
    r2     = 1 - sse / sst
    adj_r2 = 1 - (1-r2) * (n-1) / (n-k-1)

    return {"betas": betas, "adj_r2": adj_r2,
            "t": t, "p": p}
    # own-price elasticity = betas[1]  (log-log → direct elasticity)

343 spec combos tested per grain
19 / 26 grains get direct model with correct sign
7 grains flagged as Forced=True → go to proxies

8

Assign proxy elasticities — pipeline/proxies.py

Python

pipeline/proxies.py → assign_proxies()

assign_proxies( ) — fills 7 forced grains

def assign_proxies(best_df, pack_order):
    for _, row in best_df.iterrows():
        if not row["Forced"]:
            # Direct model — use as-is
            row["Final_OwnE"]  = row["OwnE"]
            row["IsProxy"]     = False
            continue

        # Priority 1: interpolate adjacent packs
        if lower and upper:
            row["Final_OwnE"] = (lo_e + hi_e) / 2
            row["ProxyMethod"] = f"Interp {lower} & {upper}"

        # Priority 2: borrow from nearest valid pack
        else:
            row["Final_OwnE"] = closest["OwnE"]
            row["ProxyMethod"] = f"Borrowed from {closest['Pack']}"

    # Clamp to [-6.0, 0]  (config: ELASTICITY_ABS_CAP = 6.0)
    final_df["Final_OwnE"] = final_df["Final_OwnE"].clip(lower=-6.0)
    return final_df  # 26 rows, all sign-OK

compute_freq_anchors( )

def compute_freq_anchors(df, focal):
    for ch in channels:
      for rg in regions:
        # Vol salience per pack
        vs = { ps: vol / brand_tot * 100
               for ps in packs }

        # Anchor = dominant pack  
        # (must be ≥ 1.20x next largest)
        if top >= 1.20 * second:
            anchor = max(vs, key=vs.get)

        anchors[f"{ch}|{rg}"] = {
            "anchor": anchor,
            "vol_sal": vs[anchor]
        }
    return anchors
    # e.g. TT|All India → 33-80  (72.5% vol)

9

Write JSON outputs — pipeline/exporters/

Output

5 exporter modules → 16 JSON files

exporters/stats.py

build_model_export(final_df, grain_metrics)
# → models.json  (elasticity per grain)

build_stats_json(df, focal, final_df, anchors)
# → stats.json   (KPIs: vol, val, growth%)

# → freq_anchors.json  (dominant packs)

exporters/market.py

build_trend_json(df, focal, competitors)
# → trend.json   (monthly time-series)

build_ms_json(df, focal)
# → ms.json      (market share yr25 vs yr24)

build_comp_ms_json(df, focal, competitors)
# → comp_ms.json (all-brand share)

build_vtm_json(df, focal, competitors)
# → vtm.json     (volume-to-market)

build_vol_salience_json(df, focal)
# → vol_salience.json

build_val_share_json(df, focal)
# → val_share.json

exporters/ppa.py

build_ppa_json(df, focal, competitors,
    channel=mt_ch, excel_sheet="3_PPA_MT")
# reads PPA sheet from Excel
# → ppa_mt.json

build_ppa_json(df, focal, competitors,
    channel=tt_ch, excel_sheet="4_PPA_TT")
# → ppa_tt.json

exporters/analytics.py

build_interaction_json(df, focal, competitors)
# Pearson correlation per brand×pack pair
# → interaction.json

build_growth_decomp_json(df, focal)
# pp-contribution to brand vol growth
# → growth_decomp.json

build_pgi_json(df, focal, pack_order)
# price gradient index per channel
# → pgi.json

exporters/recommendations.py

build_recs_json(df, focal, competitors,
    final_df, pack_order, excel_path)
# pricing rec cards per CH × Pack
# → recs_full.json

models.json

→ Elasticity Results tab

stats.json

→ Header KPIs

freq_anchors.json

→ Methodology tab

trend.json

→ Trends tab

ms.json

→ shared

comp_ms.json

→ shared

vtm.json

→ Brand Interaction tab

vol_salience.json

→ shared

val_share.json

→ shared

ppa_mt.json

→ PPA tab MT

ppa_tt.json

→ PPA tab TT

interaction.json

→ Brand Interaction tab

growth_decomp.json

→ Growth Decomp tab

pgi.json

→ Price Gradient tab

recs_full.json

→ Recommendations tab

recs.json

→ alias of recs_full

10

User clicks Reload Dashboard → React fetches all data

APIPythonFrontend

DataContext.jsx → upload_server.py → React tabs

Python endpoint (upload_server.py)

@app.get("/api/data/{key}")
def get_data(key: str):
    fname    = DATA_KEY_MAP[key]
    # e.g. "models" → "models.json"

    json_file = OUTPUT_DIR / fname
    # e.g. output/models.json

    _tprint(f"[DATA] GET /api/data/{key}"
            f" → output/{fname}"
            f" → React")

    with open(json_file) as fh:
        data = json.load(fh)

    return JSONResponse(content=data)

Frontend fetch (DataContext.jsx)

async function fetchKey(key) {
  // 1st: try FastAPI backend
  try {
    const res = await fetch(`/api/data/${key}`)
    if (res.ok) return res.json()
  } catch (_) {}

  // fallback: Vite static file
  const res = await fetch(`/data/${key}.json`)
  return res.json()
}

// Called for all 16 keys
for (const key of KEYS) {
  results[key] = await fetchKey(key)
}
// → React context updated
// → All 9 tabs re-render

Terminal shows: [DATA] GET /api/data/models → output/models.json (15.2 KB) → React
× 16 calls total
All tabs receive fresh pipeline-computed data

⚡

All API calls — in order of execution

Order	Method	Endpoint	Caller	Python handler	Response
1	POST	`/api/upload/reset`	UploadModal (on open)	`reset_job()`	`{ ok: true }`
2	POST	`/api/upload`	UploadModal (file pick)	`upload_excel()`	`{ ok: true, filename }`
3…N	GET	`/api/upload/status`	UploadModal (every 1.5s)	`get_status()`	`{ status, elapsed_s }`
3…N	GET	`/api/upload/logs`	UploadModal (every 1.5s)	`get_logs()`	`{ logs }`
N+1	GET	`/api/data/models`	DataContext (reload)	`get_data("models")`	models.json contents
N+2	GET	`/api/data/stats`	DataContext (reload)	`get_data("stats")`	stats.json contents
…	GET	`/api/data/{key}` ×14 more	DataContext (reload)	`get_data(key)`	JSON for each key