Lior-0618 commited on
Commit
a85acb5
·
1 Parent(s): 651926c

chore: merge master → dev/video-fer (live camera FER mode)

Browse files

New from master:
- Live camera page with real-time browser-side FER
- use-fer.ts / use-streaming-transcription.ts hooks
- Sidebar /live link, warmup route, next.config.ts updates

Dockerfile downloads emotion_model_web.onnx from GitHub at build
time (HF Xet restriction prevents pushing binary via git).

Dockerfile CHANGED
@@ -36,6 +36,11 @@ RUN cd web && npm ci
36
 
37
  COPY web/ web/
38
 
 
 
 
 
 
39
  # Build with empty API base → browser uses relative paths → nginx routes /api/*
40
  RUN cd web && NEXT_PUBLIC_API_URL="" npm run build \
41
  && cp -r public .next/standalone/public \
 
36
 
37
  COPY web/ web/
38
 
39
+ # Download browser-side FER model (ONNX) — not in HF Space git (Xet restriction)
40
+ RUN curl -fsSL \
41
+ "https://raw.githubusercontent.com/aytoast/ser/master/web/public/emotion_model_web.onnx" \
42
+ -o web/public/emotion_model_web.onnx
43
+
44
  # Build with empty API base → browser uses relative paths → nginx routes /api/*
45
  RUN cd web && NEXT_PUBLIC_API_URL="" npm run build \
46
  && cp -r public .next/standalone/public \
web/.gitignore CHANGED
@@ -39,3 +39,7 @@ yarn-error.log*
39
  # typescript
40
  *.tsbuildinfo
41
  next-env.d.ts
 
 
 
 
 
39
  # typescript
40
  *.tsbuildinfo
41
  next-env.d.ts
42
+
43
+ # ONNX Runtime WASM (copied from node_modules at build time)
44
+ public/*.wasm
45
+ public/*.mjs
web/next.config.ts CHANGED
@@ -2,6 +2,8 @@ import type { NextConfig } from "next";
2
 
3
  const nextConfig: NextConfig = {
4
  output: "standalone",
 
 
5
  };
6
 
7
  export default nextConfig;
 
2
 
3
  const nextConfig: NextConfig = {
4
  output: "standalone",
5
+ // Allow Turbopack (Next.js 16 default) — WASM files served from public/
6
+ turbopack: {},
7
  };
8
 
9
  export default nextConfig;
web/package-lock.json CHANGED
@@ -16,6 +16,7 @@
16
  "framer-motion": "^12.34.3",
17
  "lucide-react": "^0.575.0",
18
  "next": "16.1.6",
 
19
  "radix-ui": "^1.4.3",
20
  "react": "19.2.3",
21
  "react-dom": "19.2.3",
@@ -1985,6 +1986,70 @@
1985
  "react-dom": ">= 16.8"
1986
  }
1987
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1988
  "node_modules/@radix-ui/number": {
1989
  "version": "1.1.1",
1990
  "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz",
@@ -4018,7 +4083,6 @@
4018
  "version": "20.19.35",
4019
  "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.35.tgz",
4020
  "integrity": "sha512-Uarfe6J91b9HAUXxjvSOdiO2UPOKLm07Q1oh0JHxoZ1y8HoqxDAu3gVrsrOHeiio0kSsoVBt4wFrKOm0dKxVPQ==",
4021
- "dev": true,
4022
  "license": "MIT",
4023
  "dependencies": {
4024
  "undici-types": "~6.21.0"
@@ -7046,6 +7110,12 @@
7046
  "node": ">=16"
7047
  }
7048
  },
 
 
 
 
 
 
7049
  "node_modules/flatted": {
7050
  "version": "3.3.3",
7051
  "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz",
@@ -7424,6 +7494,12 @@
7424
  "node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0"
7425
  }
7426
  },
 
 
 
 
 
 
7427
  "node_modules/has-bigints": {
7428
  "version": "1.1.0",
7429
  "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz",
@@ -8850,6 +8926,12 @@
8850
  "url": "https://github.com/sponsors/sindresorhus"
8851
  }
8852
  },
 
 
 
 
 
 
8853
  "node_modules/loose-envify": {
8854
  "version": "1.4.0",
8855
  "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
@@ -9520,6 +9602,26 @@
9520
  "url": "https://github.com/sponsors/sindresorhus"
9521
  }
9522
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9523
  "node_modules/open": {
9524
  "version": "11.0.0",
9525
  "resolved": "https://registry.npmjs.org/open/-/open-11.0.0.tgz",
@@ -9785,6 +9887,12 @@
9785
  "node": ">=16.20.0"
9786
  }
9787
  },
 
 
 
 
 
 
9788
  "node_modules/possible-typed-array-names": {
9789
  "version": "1.1.0",
9790
  "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz",
@@ -9913,6 +10021,30 @@
9913
  "react-is": "^16.13.1"
9914
  }
9915
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9916
  "node_modules/proxy-addr": {
9917
  "version": "2.0.7",
9918
  "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
@@ -11669,7 +11801,6 @@
11669
  "version": "6.21.0",
11670
  "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
11671
  "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
11672
- "dev": true,
11673
  "license": "MIT"
11674
  },
11675
  "node_modules/unicorn-magic": {
 
16
  "framer-motion": "^12.34.3",
17
  "lucide-react": "^0.575.0",
18
  "next": "16.1.6",
19
+ "onnxruntime-web": "^1.20.1",
20
  "radix-ui": "^1.4.3",
21
  "react": "19.2.3",
22
  "react-dom": "19.2.3",
 
1986
  "react-dom": ">= 16.8"
1987
  }
1988
  },
1989
+ "node_modules/@protobufjs/aspromise": {
1990
+ "version": "1.1.2",
1991
+ "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
1992
+ "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
1993
+ "license": "BSD-3-Clause"
1994
+ },
1995
+ "node_modules/@protobufjs/base64": {
1996
+ "version": "1.1.2",
1997
+ "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
1998
+ "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
1999
+ "license": "BSD-3-Clause"
2000
+ },
2001
+ "node_modules/@protobufjs/codegen": {
2002
+ "version": "2.0.4",
2003
+ "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
2004
+ "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
2005
+ "license": "BSD-3-Clause"
2006
+ },
2007
+ "node_modules/@protobufjs/eventemitter": {
2008
+ "version": "1.1.0",
2009
+ "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
2010
+ "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
2011
+ "license": "BSD-3-Clause"
2012
+ },
2013
+ "node_modules/@protobufjs/fetch": {
2014
+ "version": "1.1.0",
2015
+ "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
2016
+ "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
2017
+ "license": "BSD-3-Clause",
2018
+ "dependencies": {
2019
+ "@protobufjs/aspromise": "^1.1.1",
2020
+ "@protobufjs/inquire": "^1.1.0"
2021
+ }
2022
+ },
2023
+ "node_modules/@protobufjs/float": {
2024
+ "version": "1.0.2",
2025
+ "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
2026
+ "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
2027
+ "license": "BSD-3-Clause"
2028
+ },
2029
+ "node_modules/@protobufjs/inquire": {
2030
+ "version": "1.1.0",
2031
+ "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
2032
+ "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
2033
+ "license": "BSD-3-Clause"
2034
+ },
2035
+ "node_modules/@protobufjs/path": {
2036
+ "version": "1.1.2",
2037
+ "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
2038
+ "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
2039
+ "license": "BSD-3-Clause"
2040
+ },
2041
+ "node_modules/@protobufjs/pool": {
2042
+ "version": "1.1.0",
2043
+ "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
2044
+ "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
2045
+ "license": "BSD-3-Clause"
2046
+ },
2047
+ "node_modules/@protobufjs/utf8": {
2048
+ "version": "1.1.0",
2049
+ "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
2050
+ "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
2051
+ "license": "BSD-3-Clause"
2052
+ },
2053
  "node_modules/@radix-ui/number": {
2054
  "version": "1.1.1",
2055
  "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz",
 
4083
  "version": "20.19.35",
4084
  "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.35.tgz",
4085
  "integrity": "sha512-Uarfe6J91b9HAUXxjvSOdiO2UPOKLm07Q1oh0JHxoZ1y8HoqxDAu3gVrsrOHeiio0kSsoVBt4wFrKOm0dKxVPQ==",
 
4086
  "license": "MIT",
4087
  "dependencies": {
4088
  "undici-types": "~6.21.0"
 
7110
  "node": ">=16"
7111
  }
7112
  },
7113
+ "node_modules/flatbuffers": {
7114
+ "version": "1.12.0",
7115
+ "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
7116
+ "integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==",
7117
+ "license": "SEE LICENSE IN LICENSE.txt"
7118
+ },
7119
  "node_modules/flatted": {
7120
  "version": "3.3.3",
7121
  "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz",
 
7494
  "node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0"
7495
  }
7496
  },
7497
+ "node_modules/guid-typescript": {
7498
+ "version": "1.0.9",
7499
+ "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
7500
+ "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
7501
+ "license": "ISC"
7502
+ },
7503
  "node_modules/has-bigints": {
7504
  "version": "1.1.0",
7505
  "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.1.0.tgz",
 
8926
  "url": "https://github.com/sponsors/sindresorhus"
8927
  }
8928
  },
8929
+ "node_modules/long": {
8930
+ "version": "5.3.2",
8931
+ "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
8932
+ "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
8933
+ "license": "Apache-2.0"
8934
+ },
8935
  "node_modules/loose-envify": {
8936
  "version": "1.4.0",
8937
  "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
 
9602
  "url": "https://github.com/sponsors/sindresorhus"
9603
  }
9604
  },
9605
+ "node_modules/onnxruntime-common": {
9606
+ "version": "1.20.1",
9607
+ "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.20.1.tgz",
9608
+ "integrity": "sha512-YiU0s0IzYYC+gWvqD1HzLc46Du1sXpSiwzKb63PACIJr6LfL27VsXSXQvt68EzD3V0D5Bc0vyJTjmMxp0ylQiw==",
9609
+ "license": "MIT"
9610
+ },
9611
+ "node_modules/onnxruntime-web": {
9612
+ "version": "1.20.1",
9613
+ "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.20.1.tgz",
9614
+ "integrity": "sha512-TePF6XVpLL1rWVMIl5Y9ACBQcyCNFThZON/jgElNd9Txb73CIEGlklhYR3UEr1cp5r0rbGI6nDwwrs79g7WjoA==",
9615
+ "license": "MIT",
9616
+ "dependencies": {
9617
+ "flatbuffers": "^1.12.0",
9618
+ "guid-typescript": "^1.0.9",
9619
+ "long": "^5.2.3",
9620
+ "onnxruntime-common": "1.20.1",
9621
+ "platform": "^1.3.6",
9622
+ "protobufjs": "^7.2.4"
9623
+ }
9624
+ },
9625
  "node_modules/open": {
9626
  "version": "11.0.0",
9627
  "resolved": "https://registry.npmjs.org/open/-/open-11.0.0.tgz",
 
9887
  "node": ">=16.20.0"
9888
  }
9889
  },
9890
+ "node_modules/platform": {
9891
+ "version": "1.3.6",
9892
+ "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
9893
+ "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
9894
+ "license": "MIT"
9895
+ },
9896
  "node_modules/possible-typed-array-names": {
9897
  "version": "1.1.0",
9898
  "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz",
 
10021
  "react-is": "^16.13.1"
10022
  }
10023
  },
10024
+ "node_modules/protobufjs": {
10025
+ "version": "7.5.4",
10026
+ "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
10027
+ "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==",
10028
+ "hasInstallScript": true,
10029
+ "license": "BSD-3-Clause",
10030
+ "dependencies": {
10031
+ "@protobufjs/aspromise": "^1.1.2",
10032
+ "@protobufjs/base64": "^1.1.2",
10033
+ "@protobufjs/codegen": "^2.0.4",
10034
+ "@protobufjs/eventemitter": "^1.1.0",
10035
+ "@protobufjs/fetch": "^1.1.0",
10036
+ "@protobufjs/float": "^1.0.2",
10037
+ "@protobufjs/inquire": "^1.1.0",
10038
+ "@protobufjs/path": "^1.1.2",
10039
+ "@protobufjs/pool": "^1.1.0",
10040
+ "@protobufjs/utf8": "^1.1.0",
10041
+ "@types/node": ">=13.7.0",
10042
+ "long": "^5.0.0"
10043
+ },
10044
+ "engines": {
10045
+ "node": ">=12.0.0"
10046
+ }
10047
+ },
10048
  "node_modules/proxy-addr": {
10049
  "version": "2.0.7",
10050
  "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
 
11801
  "version": "6.21.0",
11802
  "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
11803
  "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
 
11804
  "license": "MIT"
11805
  },
11806
  "node_modules/unicorn-magic": {
web/package.json CHANGED
@@ -17,6 +17,7 @@
17
  "framer-motion": "^12.34.3",
18
  "lucide-react": "^0.575.0",
19
  "next": "16.1.6",
 
20
  "radix-ui": "^1.4.3",
21
  "react": "19.2.3",
22
  "react-dom": "19.2.3",
 
17
  "framer-motion": "^12.34.3",
18
  "lucide-react": "^0.575.0",
19
  "next": "16.1.6",
20
+ "onnxruntime-web": "^1.20.1",
21
  "radix-ui": "^1.4.3",
22
  "react": "19.2.3",
23
  "react-dom": "19.2.3",
web/src/app/api/warmup/route.ts ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Warmup endpoint: pings the Modal API /health to wake the GPU container.
3
+ * Modal containers have a 5-min scaledown window, so hitting /health
4
+ * during page load ensures the container is warm when recording starts.
5
+ */
6
+
7
+ const MODAL_API_URL =
8
+ process.env.MODAL_API_URL ??
9
+ "https://yongkang-zou1999--evoxtral-api-evoxtralmodel-web.modal.run";
10
+
11
+ export async function GET() {
12
+ try {
13
+ const res = await fetch(`${MODAL_API_URL}/health`, {
14
+ signal: AbortSignal.timeout(60_000), // Modal cold start can take up to 60s
15
+ });
16
+ const data = await res.json();
17
+ return Response.json(data);
18
+ } catch {
19
+ return Response.json({ status: "unreachable" }, { status: 502 });
20
+ }
21
+ }
web/src/app/live/page.tsx ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client"
2
+
3
+ import React, { useCallback, useEffect, useRef, useState } from "react"
4
+ import Link from "next/link"
5
+ import { ArrowLeft, VideoCamera, CircleNotch } from "@phosphor-icons/react"
6
+ import { useFER, FER_LABELS, type EmotionScores } from "@/hooks/use-fer"
7
+ import { useStreamingTranscription } from "@/hooks/use-streaming-transcription"
8
+
9
+ // ── Emotion bars overlay (bottom-left, transparent) ─────────────────────
10
+ function EmotionBars({ scores }: { scores: EmotionScores }) {
11
+ return (
12
+ <div className="absolute bottom-3 left-3 w-56 rounded-xl bg-black/40 backdrop-blur-md px-4 py-3">
13
+ <div className="space-y-1.5">
14
+ {FER_LABELS.map((label) => {
15
+ const value = scores[label] ?? 0
16
+ return (
17
+ <div key={label} className="flex items-center gap-2">
18
+ <div className="flex-1 h-[5px] bg-white/10 rounded-full overflow-hidden">
19
+ <div
20
+ className="h-full rounded-full bg-white/70 transition-all duration-300 ease-out"
21
+ style={{ width: `${Math.max(value * 100, 0.5)}%` }}
22
+ />
23
+ </div>
24
+ <span className="text-[10px] text-white/70 font-medium w-14 text-right">
25
+ {label}
26
+ </span>
27
+ </div>
28
+ )
29
+ })}
30
+ </div>
31
+ </div>
32
+ )
33
+ }
34
+
35
+ // ── Warmup steps ────────────────────────────────────────────────────────
36
+ type WarmupStep = { label: string; status: "pending" | "loading" | "done" | "error" }
37
+
38
+ // ── Main page ───────────────────────────────────────────────────────────
39
+ export default function LivePage() {
40
+ const videoRef = useRef<HTMLVideoElement>(null)
41
+ const transcriptRef = useRef<HTMLDivElement>(null)
42
+ const streamRef = useRef<MediaStream | null>(null)
43
+ const ferIntervalRef = useRef<ReturnType<typeof setInterval> | null>(null)
44
+
45
+ const [cameraReady, setCameraReady] = useState(false)
46
+ const [permissionError, setPermissionError] = useState<string | null>(null)
47
+ const [warmupDone, setWarmupDone] = useState(false)
48
+ const [warmupSteps, setWarmupSteps] = useState<WarmupStep[]>([
49
+ { label: "Camera & Microphone", status: "pending" },
50
+ { label: "FER Model (emotion detection)", status: "pending" },
51
+ { label: "Evoxtral API (transcription)", status: "pending" },
52
+ ])
53
+
54
+ const { isLoaded: ferLoaded, scores, preload: preloadFER, classify } = useFER()
55
+ const { isRecording, isTranscribing, transcript, currentChunk, start, stop, reset } =
56
+ useStreamingTranscription()
57
+
58
+ // ── Warmup sequence ─────────────────────────────────────────────────
59
+ useEffect(() => {
60
+ let cancelled = false
61
+
62
+ const updateStep = (idx: number, status: WarmupStep["status"]) => {
63
+ setWarmupSteps((prev) => prev.map((s, i) => (i === idx ? { ...s, status } : s)))
64
+ }
65
+
66
+ async function warmup() {
67
+ // Step 1: Camera & Mic
68
+ updateStep(0, "loading")
69
+ try {
70
+ const stream = await navigator.mediaDevices.getUserMedia({
71
+ video: true,
72
+ audio: true,
73
+ })
74
+ if (cancelled) {
75
+ stream.getTracks().forEach((t) => t.stop())
76
+ return
77
+ }
78
+ streamRef.current = stream
79
+ if (videoRef.current) {
80
+ videoRef.current.srcObject = stream
81
+ }
82
+ setCameraReady(true)
83
+ updateStep(0, "done")
84
+ } catch (err: unknown) {
85
+ if (cancelled) return
86
+ updateStep(0, "error")
87
+ if (err instanceof DOMException && err.name === "NotAllowedError") {
88
+ setPermissionError(
89
+ "Camera and microphone access was denied. Please allow permissions and reload."
90
+ )
91
+ } else {
92
+ setPermissionError(
93
+ "Could not access camera or microphone. Check your device settings."
94
+ )
95
+ }
96
+ return
97
+ }
98
+
99
+ // Step 2: FER model (parallel with step 3)
100
+ updateStep(1, "loading")
101
+ const ferPromise = preloadFER()
102
+ .then(() => { if (!cancelled) updateStep(1, "done") })
103
+ .catch(() => { if (!cancelled) updateStep(1, "error") })
104
+
105
+ // Step 3: Evoxtral API warmup — actually wake the Modal GPU container
106
+ updateStep(2, "loading")
107
+ const apiPromise = fetch("/api/warmup")
108
+ .then((r) => r.json())
109
+ .then((data) => {
110
+ if (!cancelled) updateStep(2, data.status === "ok" ? "done" : "error")
111
+ })
112
+ .catch(() => { if (!cancelled) updateStep(2, "error") })
113
+
114
+ await Promise.all([ferPromise, apiPromise])
115
+ if (!cancelled) setWarmupDone(true)
116
+ }
117
+
118
+ warmup()
119
+
120
+ return () => {
121
+ cancelled = true
122
+ streamRef.current?.getTracks().forEach((t) => t.stop())
123
+ streamRef.current = null
124
+ }
125
+ }, [preloadFER])
126
+
127
+ // ── FER classification interval (always on when camera ready) ────────
128
+ useEffect(() => {
129
+ if (!ferLoaded || !cameraReady) return
130
+
131
+ ferIntervalRef.current = setInterval(() => {
132
+ if (videoRef.current) {
133
+ classify(videoRef.current)
134
+ }
135
+ }, 500)
136
+
137
+ return () => {
138
+ if (ferIntervalRef.current) clearInterval(ferIntervalRef.current)
139
+ }
140
+ }, [ferLoaded, cameraReady, classify])
141
+
142
+ // ── Auto-scroll transcript ────────────────────────────────────────────
143
+ useEffect(() => {
144
+ if (transcriptRef.current) {
145
+ transcriptRef.current.scrollTop = transcriptRef.current.scrollHeight
146
+ }
147
+ }, [transcript, currentChunk])
148
+
149
+ // ── Handlers ──────────────────────────────────────────────────────────
150
+ const handleToggleRecording = useCallback(() => {
151
+ if (isRecording) {
152
+ stop()
153
+ } else {
154
+ // Reuse the existing camera+mic stream so we don't request permissions again
155
+ start(streamRef.current ?? undefined)
156
+ }
157
+ }, [isRecording, start, stop])
158
+
159
+ const hasScores = Object.keys(scores).length > 0
160
+
161
+ // ── Render ────────────────────────────────────────────────────────────
162
+ return (
163
+ <div className="flex flex-col min-h-screen bg-background text-foreground">
164
+ {/* Header */}
165
+ <header className="flex items-center justify-between px-6 py-3 border-b border-border/40">
166
+ <Link
167
+ href="/"
168
+ className="flex items-center gap-1.5 text-sm text-muted-foreground hover:text-foreground transition-colors"
169
+ >
170
+ <ArrowLeft size={16} weight="bold" />
171
+ <span>Back to Studio</span>
172
+ </Link>
173
+
174
+ <span className="text-sm font-semibold tracking-widest uppercase text-muted-foreground">
175
+ Live Mode
176
+ </span>
177
+
178
+ <div className="flex items-center gap-2">
179
+ {isRecording && (
180
+ <span className="relative flex h-2.5 w-2.5">
181
+ <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75" />
182
+ <span className="relative inline-flex rounded-full h-2.5 w-2.5 bg-red-500" />
183
+ </span>
184
+ )}
185
+ </div>
186
+ </header>
187
+
188
+ {/* Body */}
189
+ <div className="flex-1 flex flex-col items-center gap-6 px-4 py-8 max-w-4xl mx-auto w-full">
190
+ {/* Camera preview */}
191
+ <div className="relative w-full aspect-video rounded-2xl overflow-hidden bg-slate-950 border border-border/30 shadow-2xl">
192
+ {permissionError ? (
193
+ <div className="absolute inset-0 flex flex-col items-center justify-center gap-3 p-8 text-center">
194
+ <VideoCamera size={48} className="text-muted-foreground/40" weight="thin" />
195
+ <p className="text-sm text-muted-foreground max-w-md">{permissionError}</p>
196
+ </div>
197
+ ) : !cameraReady ? (
198
+ <div className="absolute inset-0 flex flex-col items-center justify-center gap-3">
199
+ <VideoCamera size={48} className="text-muted-foreground/30 animate-pulse" weight="thin" />
200
+ <p className="text-xs text-muted-foreground/50">Initializing camera...</p>
201
+ </div>
202
+ ) : null}
203
+
204
+ <video
205
+ ref={videoRef}
206
+ autoPlay
207
+ muted
208
+ playsInline
209
+ className="w-full h-full object-cover"
210
+ style={{ transform: "scaleX(-1)" }}
211
+ />
212
+
213
+ {/* Emotion bars overlay */}
214
+ {cameraReady && hasScores && <EmotionBars scores={scores} />}
215
+
216
+ {/* Record button — centred at bottom of video */}
217
+ {cameraReady && (
218
+ <div className="absolute bottom-4 left-1/2 -translate-x-1/2 flex items-center gap-3">
219
+ <button
220
+ onClick={handleToggleRecording}
221
+ disabled={!warmupDone || isTranscribing}
222
+ className="group relative flex items-center justify-center"
223
+ aria-label={isRecording ? "Stop recording" : "Start recording"}
224
+ >
225
+ {/* outer ring */}
226
+ <span
227
+ className={`absolute size-16 rounded-full border-[3px] transition-colors ${
228
+ isRecording ? "border-white" : isTranscribing ? "border-white/30" : "border-white/60"
229
+ }`}
230
+ />
231
+ {/* inner shape */}
232
+ <span
233
+ className={`transition-all duration-200 ${
234
+ !warmupDone || isTranscribing
235
+ ? "size-10 rounded-full bg-white/30 animate-pulse"
236
+ : isRecording
237
+ ? "size-6 rounded-[4px] bg-red-500"
238
+ : "size-12 rounded-full bg-red-500 group-hover:bg-red-400"
239
+ }`}
240
+ />
241
+ </button>
242
+
243
+ {/* Reset button */}
244
+ {(transcript || currentChunk) && !isRecording && !isTranscribing && (
245
+ <button
246
+ onClick={reset}
247
+ className="text-[11px] text-white/60 hover:text-white/90 uppercase tracking-wider transition-colors"
248
+ >
249
+ Reset
250
+ </button>
251
+ )}
252
+ </div>
253
+ )}
254
+
255
+ {/* Transcript overlay — bottom-right, glassmorphism */}
256
+ {cameraReady && (transcript || currentChunk || isTranscribing) && (
257
+ <div
258
+ ref={transcriptRef}
259
+ className="absolute bottom-3 right-3 w-64 max-h-32 overflow-y-auto rounded-xl bg-black/40 backdrop-blur-md px-4 py-3 text-[12px] leading-relaxed text-white/90"
260
+ >
261
+ {transcript}
262
+ {currentChunk && (
263
+ <span className="text-white/50">{currentChunk}</span>
264
+ )}
265
+ {isTranscribing && !currentChunk && (
266
+ <span className="text-white/40 italic">Transcribing...</span>
267
+ )}
268
+ </div>
269
+ )}
270
+ </div>
271
+
272
+ {/* Warmup progress (shown before ready) */}
273
+ {!warmupDone && (
274
+ <div className="w-full rounded-xl bg-muted/30 border border-border/30 p-5 space-y-3">
275
+ <p className="text-xs font-semibold uppercase tracking-wider text-muted-foreground/60">
276
+ Warming up...
277
+ </p>
278
+ {warmupSteps.map((step, i) => (
279
+ <div key={i} className="flex items-center gap-3 text-sm">
280
+ {step.status === "pending" && (
281
+ <div className="size-4 rounded-full border-2 border-muted-foreground/20" />
282
+ )}
283
+ {step.status === "loading" && (
284
+ <CircleNotch size={16} className="text-blue-500 animate-spin" weight="bold" />
285
+ )}
286
+ {step.status === "done" && (
287
+ <div className="size-4 rounded-full bg-emerald-500 flex items-center justify-center">
288
+ <svg width="10" height="10" viewBox="0 0 10 10" fill="none">
289
+ <path d="M2 5L4 7L8 3" stroke="white" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
290
+ </svg>
291
+ </div>
292
+ )}
293
+ {step.status === "error" && (
294
+ <div className="size-4 rounded-full bg-red-500 flex items-center justify-center">
295
+ <span className="text-[10px] text-white font-bold">!</span>
296
+ </div>
297
+ )}
298
+ <span className={step.status === "done" ? "text-foreground/70" : step.status === "loading" ? "text-foreground" : "text-muted-foreground/50"}>
299
+ {step.label}
300
+ </span>
301
+ </div>
302
+ ))}
303
+ </div>
304
+ )}
305
+ </div>
306
+ </div>
307
+ )
308
+ }
web/src/components/sidebar.tsx CHANGED
@@ -9,6 +9,7 @@ import {
9
  Heart,
10
  Plus,
11
  X,
 
12
  } from "@phosphor-icons/react"
13
  import Image from "next/image"
14
  import {
@@ -95,6 +96,14 @@ export function AppSidebar() {
95
  </Link>
96
  </SidebarMenuButton>
97
  </SidebarMenuItem>
 
 
 
 
 
 
 
 
98
  </SidebarMenu>
99
  </SidebarGroupContent>
100
  </SidebarGroup>
 
9
  Heart,
10
  Plus,
11
  X,
12
+ Broadcast,
13
  } from "@phosphor-icons/react"
14
  import Image from "next/image"
15
  import {
 
96
  </Link>
97
  </SidebarMenuButton>
98
  </SidebarMenuItem>
99
+ <SidebarMenuItem>
100
+ <SidebarMenuButton asChild isActive={pathname === "/live"}>
101
+ <Link href="/live">
102
+ <Broadcast />
103
+ <span>Live Mode</span>
104
+ </Link>
105
+ </SidebarMenuButton>
106
+ </SidebarMenuItem>
107
  </SidebarMenu>
108
  </SidebarGroupContent>
109
  </SidebarGroup>
web/src/hooks/use-fer.ts ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client"
2
+
3
+ import { useState, useRef, useCallback, useEffect } from "react"
4
+
5
+ export const FER_LABELS = [
6
+ "Anger",
7
+ "Contempt",
8
+ "Disgust",
9
+ "Fear",
10
+ "Happy",
11
+ "Neutral",
12
+ "Sad",
13
+ "Surprise",
14
+ ] as const
15
+
16
+ export type EmotionScores = Record<string, number>
17
+
18
+ const IMAGE_SIZE = 224
19
+
20
+ const IMAGENET_MEAN = [0.485, 0.456, 0.406]
21
+ const IMAGENET_STD = [0.229, 0.224, 0.225]
22
+
23
+ function softmax(scores: Float32Array): Float32Array {
24
+ const max = Math.max(...scores)
25
+ const exps = new Float32Array(scores.length)
26
+ let sum = 0
27
+ for (let i = 0; i < scores.length; i++) {
28
+ exps[i] = Math.exp(scores[i] - max)
29
+ sum += exps[i]
30
+ }
31
+ for (let i = 0; i < exps.length; i++) {
32
+ exps[i] /= sum
33
+ }
34
+ return exps
35
+ }
36
+
37
+ export function useFER(): {
38
+ isLoaded: boolean
39
+ emotion: string | null
40
+ confidence: number
41
+ scores: EmotionScores
42
+ preload: () => Promise<void>
43
+ classify: (video: HTMLVideoElement) => Promise<void>
44
+ } {
45
+ const [isLoaded, setIsLoaded] = useState(false)
46
+ const [emotion, setEmotion] = useState<string | null>(null)
47
+ const [confidence, setConfidence] = useState(0)
48
+ const [scores, setScores] = useState<EmotionScores>({})
49
+
50
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
51
+ const sessionRef = useRef<any>(null)
52
+ const canvasRef = useRef<HTMLCanvasElement | null>(null)
53
+ const loadingRef = useRef(false)
54
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
55
+ const ortRef = useRef<any>(null)
56
+
57
+ const loadModel = useCallback(async () => {
58
+ if (sessionRef.current || loadingRef.current) return
59
+ loadingRef.current = true
60
+
61
+ try {
62
+ const ort = await import("onnxruntime-web")
63
+ ortRef.current = ort
64
+
65
+ // Configure WASM before creating session
66
+ ort.env.wasm.numThreads = 1
67
+ ort.env.wasm.wasmPaths = "/"
68
+
69
+ // Fetch model as ArrayBuffer
70
+ const response = await fetch("/emotion_model_web.onnx")
71
+ const modelBuffer = await response.arrayBuffer()
72
+
73
+ const session = await ort.InferenceSession.create(
74
+ new Uint8Array(modelBuffer),
75
+ { executionProviders: ["wasm"] }
76
+ )
77
+ sessionRef.current = session
78
+ setIsLoaded(true)
79
+ } catch (err) {
80
+ console.error("[useFER] Failed to load ONNX model:", err)
81
+ loadingRef.current = false
82
+ }
83
+ }, [])
84
+
85
+ const classify = useCallback(
86
+ async (video: HTMLVideoElement) => {
87
+ try {
88
+ if (!sessionRef.current) {
89
+ await loadModel()
90
+ }
91
+
92
+ const session = sessionRef.current
93
+ const ort = ortRef.current
94
+ if (!session || !ort) return
95
+
96
+ if (!canvasRef.current) {
97
+ canvasRef.current = document.createElement("canvas")
98
+ canvasRef.current.width = IMAGE_SIZE
99
+ canvasRef.current.height = IMAGE_SIZE
100
+ }
101
+
102
+ const canvas = canvasRef.current
103
+ const ctx = canvas.getContext("2d", { willReadFrequently: true })
104
+ if (!ctx) return
105
+
106
+ ctx.drawImage(video, 0, 0, IMAGE_SIZE, IMAGE_SIZE)
107
+ const imageData = ctx.getImageData(0, 0, IMAGE_SIZE, IMAGE_SIZE)
108
+ const { data } = imageData
109
+
110
+ const floatData = new Float32Array(1 * 3 * IMAGE_SIZE * IMAGE_SIZE)
111
+ const pixelCount = IMAGE_SIZE * IMAGE_SIZE
112
+
113
+ for (let i = 0; i < pixelCount; i++) {
114
+ const srcIdx = i * 4
115
+ floatData[i] = (data[srcIdx] / 255 - IMAGENET_MEAN[0]) / IMAGENET_STD[0]
116
+ floatData[pixelCount + i] =
117
+ (data[srcIdx + 1] / 255 - IMAGENET_MEAN[1]) / IMAGENET_STD[1]
118
+ floatData[2 * pixelCount + i] =
119
+ (data[srcIdx + 2] / 255 - IMAGENET_MEAN[2]) / IMAGENET_STD[2]
120
+ }
121
+
122
+ const inputTensor = new ort.Tensor("float32", floatData, [
123
+ 1,
124
+ 3,
125
+ IMAGE_SIZE,
126
+ IMAGE_SIZE,
127
+ ])
128
+
129
+ const inputName = session.inputNames[0]
130
+ const results = await session.run({ [inputName]: inputTensor })
131
+ const outputName = session.outputNames[0]
132
+ const output = results[outputName]
133
+
134
+ if (!output) return
135
+
136
+ const rawScores = output.data as Float32Array
137
+ const probs = softmax(rawScores)
138
+
139
+ const scoreMap: EmotionScores = {}
140
+ let maxIdx = 0
141
+ let maxVal = probs[0]
142
+ for (let i = 0; i < probs.length; i++) {
143
+ scoreMap[FER_LABELS[i]] = probs[i]
144
+ if (probs[i] > maxVal) {
145
+ maxVal = probs[i]
146
+ maxIdx = i
147
+ }
148
+ }
149
+
150
+ setScores(scoreMap)
151
+ setEmotion(FER_LABELS[maxIdx])
152
+ setConfidence(maxVal)
153
+ } catch (err) {
154
+ console.error("[useFER] Classification error:", err)
155
+ }
156
+ },
157
+ [loadModel]
158
+ )
159
+
160
+ useEffect(() => {
161
+ return () => {
162
+ if (sessionRef.current) {
163
+ sessionRef.current.release().catch((err: unknown) => {
164
+ console.error("[useFER] Failed to release session:", err)
165
+ })
166
+ sessionRef.current = null
167
+ }
168
+ }
169
+ }, [])
170
+
171
+ return { isLoaded, emotion, confidence, scores, preload: loadModel, classify }
172
+ }
web/src/hooks/use-streaming-transcription.ts ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client"
2
+
3
+ import { useState, useRef, useCallback, useEffect } from "react"
4
+
5
+ interface ChunkStreamEvent {
6
+ token?: string
7
+ done?: boolean
8
+ transcription?: string
9
+ }
10
+
11
+ /**
12
+ * Press-to-record transcription hook.
13
+ * 1. start() → begins recording audio
14
+ * 2. stop() → stops recording, sends full audio to API, streams back tokens
15
+ */
16
+ export function useStreamingTranscription() {
17
+ const [isRecording, setIsRecording] = useState(false)
18
+ const [isTranscribing, setIsTranscribing] = useState(false)
19
+ const [transcript, setTranscript] = useState("")
20
+ const [currentChunk, setCurrentChunk] = useState("")
21
+
22
+ const mediaRecorderRef = useRef<MediaRecorder | null>(null)
23
+ const ownStreamRef = useRef<MediaStream | null>(null)
24
+ const blobsRef = useRef<Blob[]>([])
25
+
26
+ const transcribe = useCallback(async (audioBlob: Blob) => {
27
+ if (audioBlob.size < 500) return
28
+
29
+ setIsTranscribing(true)
30
+ setCurrentChunk("")
31
+
32
+ try {
33
+ const formData = new FormData()
34
+ formData.append("audio", audioBlob, "recording.webm")
35
+
36
+ const response = await fetch("/api/transcribe-stream", {
37
+ method: "POST",
38
+ body: formData,
39
+ })
40
+
41
+ if (!response.ok || !response.body) {
42
+ console.error("Transcription request failed:", response.status)
43
+ return
44
+ }
45
+
46
+ const reader = response.body.getReader()
47
+ const decoder = new TextDecoder()
48
+ let buffer = ""
49
+ let streamingText = ""
50
+
51
+ while (true) {
52
+ const { done, value } = await reader.read()
53
+ if (done) break
54
+
55
+ buffer += decoder.decode(value, { stream: true })
56
+ const lines = buffer.split("\n")
57
+ buffer = lines.pop() ?? ""
58
+
59
+ for (const line of lines) {
60
+ const trimmed = line.trim()
61
+ if (!trimmed.startsWith("data: ")) continue
62
+ const jsonStr = trimmed.slice(6)
63
+ if (!jsonStr) continue
64
+
65
+ try {
66
+ const event: ChunkStreamEvent = JSON.parse(jsonStr)
67
+ if (event.done && event.transcription != null) {
68
+ // Final result — set as transcript
69
+ setTranscript((prev) =>
70
+ prev ? prev + " " + event.transcription! : event.transcription!
71
+ )
72
+ setCurrentChunk("")
73
+ } else if (event.token != null) {
74
+ streamingText += event.token
75
+ setCurrentChunk(streamingText)
76
+ }
77
+ } catch {
78
+ // ignore malformed JSON
79
+ }
80
+ }
81
+ }
82
+ } catch (error) {
83
+ console.error("Transcription error:", error)
84
+ } finally {
85
+ setIsTranscribing(false)
86
+ }
87
+ }, [])
88
+
89
+ const start = useCallback(
90
+ async (existingStream?: MediaStream) => {
91
+ if (isRecording) return
92
+
93
+ blobsRef.current = []
94
+
95
+ let stream: MediaStream
96
+ if (existingStream) {
97
+ const audioTracks = existingStream.getAudioTracks()
98
+ if (audioTracks.length === 0) {
99
+ console.error("[useStreamingTranscription] No audio tracks")
100
+ return
101
+ }
102
+ stream = new MediaStream(audioTracks)
103
+ } else {
104
+ stream = await navigator.mediaDevices.getUserMedia({ audio: true })
105
+ ownStreamRef.current = stream
106
+ }
107
+
108
+ const mimeType = MediaRecorder.isTypeSupported("audio/webm;codecs=opus")
109
+ ? "audio/webm;codecs=opus"
110
+ : "audio/webm"
111
+
112
+ const recorder = new MediaRecorder(stream, { mimeType })
113
+ mediaRecorderRef.current = recorder
114
+
115
+ recorder.ondataavailable = (event: BlobEvent) => {
116
+ if (event.data.size > 0) {
117
+ blobsRef.current.push(event.data)
118
+ }
119
+ }
120
+
121
+ recorder.start()
122
+ setIsRecording(true)
123
+ },
124
+ [isRecording]
125
+ )
126
+
127
+ const stop = useCallback(() => {
128
+ const recorder = mediaRecorderRef.current
129
+ if (!recorder || recorder.state === "inactive") return
130
+
131
+ // When the recorder stops, assemble blobs and send for transcription
132
+ recorder.onstop = () => {
133
+ const mimeType = recorder.mimeType || "audio/webm;codecs=opus"
134
+ const audioBlob = new Blob(blobsRef.current, { type: mimeType })
135
+ blobsRef.current = []
136
+ transcribe(audioBlob)
137
+ }
138
+
139
+ recorder.stop()
140
+ mediaRecorderRef.current = null
141
+
142
+ // Only stop tracks if we own the stream
143
+ if (ownStreamRef.current) {
144
+ ownStreamRef.current.getTracks().forEach((t) => t.stop())
145
+ ownStreamRef.current = null
146
+ }
147
+
148
+ setIsRecording(false)
149
+ }, [transcribe])
150
+
151
+ const reset = useCallback(() => {
152
+ setTranscript("")
153
+ setCurrentChunk("")
154
+ }, [])
155
+
156
+ useEffect(() => {
157
+ return () => {
158
+ const recorder = mediaRecorderRef.current
159
+ if (recorder && recorder.state !== "inactive") recorder.stop()
160
+ if (ownStreamRef.current) {
161
+ ownStreamRef.current.getTracks().forEach((t) => t.stop())
162
+ }
163
+ }
164
+ }, [])
165
+
166
+ return {
167
+ isRecording,
168
+ isTranscribing,
169
+ transcript,
170
+ currentChunk,
171
+ start,
172
+ stop,
173
+ reset,
174
+ }
175
+ }