Spaces:
Running
Running
Oviya
commited on
Commit
Β·
d7ebbb4
1
Parent(s):
48fb81d
add pronragupgrade
Browse files- chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/data_level0.bin +1 -1
- chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/header.bin +0 -0
- chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/length.bin +1 -1
- chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/link_lists.bin +0 -0
- chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/data_level0.bin +1 -1
- chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/header.bin +0 -0
- chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/length.bin +1 -1
- chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/link_lists.bin +0 -0
- feedback.mp4 +2 -2
- pronragupgrade.py +1146 -0
- requirements.txt +8 -0
- teacher_feedback_sentences_category.json +529 -189
- verification.py +2 -0
chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/data_level0.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 167600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90b564d60a2658c07a41e1133109c1574bb40f6ab674750bba8b8eeb28a08f25
|
| 3 |
size 167600
|
chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/header.bin
RENAMED
|
File without changes
|
chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/length.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7171cf84eb030fe5cb580f57a325f57cceb0aed0e55ea95c81d67d4181e1ed81
|
| 3 |
size 400
|
chroma_db/{6bb1d18d-491e-4b83-bb53-aa5824da7394 β 1ceaf3a3-30e6-42c4-b515-99a05466da04}/link_lists.bin
RENAMED
|
File without changes
|
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/data_level0.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 167600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f79deee1f1ed7bc4c1095b45122b981044435dc5d6fbf46d1303b8b3dcf9b9a0
|
| 3 |
size 167600
|
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/header.bin
RENAMED
|
File without changes
|
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/length.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8d329104353429c3a4fab240f87e7cba8ac17269bbfe57d26150d03cb34fa0a
|
| 3 |
size 400
|
chroma_db/{a7177db3-89c4-4f3b-b1c6-6ac2ec4b0384 β 44944ef3-9b61-4c1b-bc5e-6a49750c0c54}/link_lists.bin
RENAMED
|
File without changes
|
feedback.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d91dd148f4b3bc5f7b4174a4ddd0d5665d123202f442c342f44bc3ffb33a4932
|
| 3 |
+
size 248621925
|
pronragupgrade.py
ADDED
|
@@ -0,0 +1,1146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ο»Ώimport os
|
| 2 |
+
import re
|
| 3 |
+
import torch
|
| 4 |
+
import tempfile
|
| 5 |
+
import subprocess
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
import numpy as np
|
| 8 |
+
import json
|
| 9 |
+
import base64
|
| 10 |
+
import random
|
| 11 |
+
import chromadb
|
| 12 |
+
import eng_to_ipa as ipa
|
| 13 |
+
from flask import Flask, request, jsonify,Blueprint
|
| 14 |
+
from flask_cors import CORS
|
| 15 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
| 16 |
+
|
| 17 |
+
pronragupgrade_bp = Blueprint("pronragupgrade", __name__)
|
| 18 |
+
|
| 19 |
+
# ==================================================
|
| 20 |
+
# 1. SETUP & CONFIG
|
| 21 |
+
# ==================================================
|
| 22 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 23 |
+
VIDEO_PATH = os.path.join(BASE_DIR, "feedback.mp4")
|
| 24 |
+
JSON_PATH = os.path.join(BASE_DIR, "teacher_feedback_sentences_category.json")
|
| 25 |
+
CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
|
| 26 |
+
|
| 27 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 28 |
+
MODEL_ID = "moxeeeem/wav2vec2-finetuned-pronunciation-correction"
|
| 29 |
+
|
| 30 |
+
print(f"Loading model to {DEVICE}...")
|
| 31 |
+
processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
|
| 32 |
+
model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID).to(DEVICE)
|
| 33 |
+
model.eval()
|
| 34 |
+
|
| 35 |
+
# ==================================================
|
| 36 |
+
# 2. CHROMA DB INITIALIZATION
|
| 37 |
+
# ==================================================
|
| 38 |
+
client = chromadb.PersistentClient(path=CHROMA_DIR)
|
| 39 |
+
collection = client.get_or_create_collection("feedback")
|
| 40 |
+
|
| 41 |
+
def init_segments():
|
| 42 |
+
if collection.count() > 0:
|
| 43 |
+
return
|
| 44 |
+
|
| 45 |
+
if not os.path.exists(JSON_PATH):
|
| 46 |
+
print(f"Warning: JSON file not found at {JSON_PATH}")
|
| 47 |
+
# Create more comprehensive dummy data with multiple clips per category
|
| 48 |
+
dummy_data = [
|
| 49 |
+
# Syllable category clips
|
| 50 |
+
{"id": 1, "text": "Let's work on syllable count", "category": "syllable", "start": 0, "end": 5},
|
| 51 |
+
{"id": 2, "text": "That word has multiple syllables", "category": "syllable", "start": 5, "end": 10},
|
| 52 |
+
{"id": 3, "text": "Make sure you pronounce all syllables", "category": "syllable", "start": 10, "end": 15},
|
| 53 |
+
|
| 54 |
+
# Ending category clips
|
| 55 |
+
{"id": 4, "text": "Focus on the ending sound", "category": "ending", "start": 15, "end": 20},
|
| 56 |
+
{"id": 5, "text": "Don't forget the final consonant", "category": "ending", "start": 20, "end": 25},
|
| 57 |
+
{"id": 6, "text": "Complete the word properly", "category": "ending", "start": 25, "end": 30},
|
| 58 |
+
|
| 59 |
+
# Vowel category clips
|
| 60 |
+
{"id": 7, "text": "Let's work on vowel sounds", "category": "vowel", "start": 30, "end": 35},
|
| 61 |
+
{"id": 8, "text": "The vowel should be clear", "category": "vowel", "start": 35, "end": 40},
|
| 62 |
+
{"id": 9, "text": "Focus on vowel quality", "category": "vowel", "start": 40, "end": 45},
|
| 63 |
+
|
| 64 |
+
# Consonant category clips
|
| 65 |
+
{"id": 10, "text": "Articulate consonants clearly", "category": "consonant", "start": 45, "end": 50},
|
| 66 |
+
{"id": 11, "text": "Consonants should be crisp", "category": "consonant", "start": 50, "end": 55},
|
| 67 |
+
{"id": 12, "text": "Work on consonant sounds", "category": "consonant", "start": 55, "end": 60},
|
| 68 |
+
|
| 69 |
+
# Stress category clips
|
| 70 |
+
{"id": 13, "text": "Focus on word stress", "category": "stress", "start": 60, "end": 65},
|
| 71 |
+
{"id": 14, "text": "Emphasize the correct syllable", "category": "stress", "start": 65, "end": 70},
|
| 72 |
+
{"id": 15, "text": "Watch your rhythm and stress", "category": "stress", "start": 70, "end": 75},
|
| 73 |
+
|
| 74 |
+
# Success category clips
|
| 75 |
+
{"id": 16, "text": "Excellent work!", "category": "success", "start": 75, "end": 80},
|
| 76 |
+
{"id": 17, "text": "Great pronunciation!", "category": "success", "start": 80, "end": 85},
|
| 77 |
+
{"id": 18, "text": "Keep up the good work!", "category": "success", "start": 85, "end": 90},
|
| 78 |
+
|
| 79 |
+
# Wrong word category clips
|
| 80 |
+
{"id": 19, "text": "That sounds like a different word", "category": "wrong_word", "start": 90, "end": 95},
|
| 81 |
+
{"id": 20, "text": "Please say the target word", "category": "wrong_word", "start": 95, "end": 100},
|
| 82 |
+
|
| 83 |
+
# Multiple words category clips
|
| 84 |
+
{"id": 21, "text": "Say just one word please", "category": "multiple_words", "start": 100, "end": 105},
|
| 85 |
+
{"id": 22, "text": "Focus on a single word", "category": "multiple_words", "start": 105, "end": 110},
|
| 86 |
+
|
| 87 |
+
# Silence category clips
|
| 88 |
+
{"id": 23, "text": "I couldn't hear anything", "category": "silence", "start": 110, "end": 115},
|
| 89 |
+
{"id": 24, "text": "Please speak louder", "category": "silence", "start": 115, "end": 120},
|
| 90 |
+
|
| 91 |
+
# Specific phoneme clips
|
| 92 |
+
{"id": 25, "text": "For the 'Γ¦' sound like in cat", "category": "vowel", "phoneme": "Γ¦", "start": 120, "end": 125},
|
| 93 |
+
{"id": 26, "text": "The 'r' should be soft", "category": "consonant", "phoneme": "r", "start": 125, "end": 130},
|
| 94 |
+
{"id": 27, "text": "The 'Ιͺ' sound is short", "category": "vowel", "phoneme": "Ιͺ", "start": 130, "end": 135},
|
| 95 |
+
{"id": 28, "text": "The 't' should be clear", "category": "consonant", "phoneme": "t", "start": 135, "end": 140},
|
| 96 |
+
]
|
| 97 |
+
for item in dummy_data:
|
| 98 |
+
meta = {"category": item["category"], "start": item["start"], "end": item["end"]}
|
| 99 |
+
if "phoneme" in item:
|
| 100 |
+
meta["phoneme"] = item["phoneme"]
|
| 101 |
+
collection.add(ids=[str(item["id"])], documents=[item["text"]], metadatas=[meta])
|
| 102 |
+
print(f"Created {len(dummy_data)} dummy video segments in ChromaDB")
|
| 103 |
+
return
|
| 104 |
+
|
| 105 |
+
with open(JSON_PATH, "r", encoding="utf-8") as f:
|
| 106 |
+
data = json.load(f)
|
| 107 |
+
|
| 108 |
+
for item in data:
|
| 109 |
+
meta = {
|
| 110 |
+
"category": item["category"],
|
| 111 |
+
"start": item["start"],
|
| 112 |
+
"end": item["end"]
|
| 113 |
+
}
|
| 114 |
+
if "phoneme" in item:
|
| 115 |
+
meta["phoneme"] = item["phoneme"]
|
| 116 |
+
|
| 117 |
+
collection.add(
|
| 118 |
+
ids=[str(item["id"])],
|
| 119 |
+
documents=[item["text"]],
|
| 120 |
+
metadatas=[meta]
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
print(f"Loaded {len(data)} video segments into ChromaDB")
|
| 124 |
+
|
| 125 |
+
init_segments()
|
| 126 |
+
|
| 127 |
+
# ==================================================
|
| 128 |
+
# 3. UK ENGLISH PRONUNCIATION SYSTEM
|
| 129 |
+
# ==================================================
|
| 130 |
+
|
| 131 |
+
# UK Phoneme Sound Database
|
| 132 |
+
UK_PHONEME_DB = {
|
| 133 |
+
"Ιͺ": {"name": "KIT vowel", "example": "sit", "tip": "Short front vowel", "type": "vowel"},
|
| 134 |
+
"iΛ": {"name": "FLEECE vowel", "example": "see", "tip": "Long front vowel", "type": "vowel"},
|
| 135 |
+
"Κ": {"name": "FOOT vowel", "example": "put", "tip": "Short rounded back vowel", "type": "vowel"},
|
| 136 |
+
"uΛ": {"name": "GOOSE vowel", "example": "too", "tip": "Long rounded back vowel", "type": "vowel"},
|
| 137 |
+
"e": {"name": "DRESS vowel", "example": "bed", "tip": "Short mid front vowel", "type": "vowel"},
|
| 138 |
+
"Ι": {"name": "SCHWA", "example": "about", "tip": "Relaxed central vowel", "type": "vowel"},
|
| 139 |
+
"ΙΛ": {"name": "NURSE vowel", "example": "bird", "tip": "Long central vowel", "type": "vowel"},
|
| 140 |
+
"ΙΛ": {"name": "THOUGHT vowel", "example": "law", "tip": "Long open-mid back vowel", "type": "vowel"},
|
| 141 |
+
"Γ¦": {"name": "TRAP vowel", "example": "cat", "tip": "Short open front vowel", "type": "vowel"},
|
| 142 |
+
"Κ": {"name": "STRUT vowel", "example": "cup", "tip": "Short mid back vowel", "type": "vowel"},
|
| 143 |
+
"ΙΛ": {"name": "BATH vowel", "example": "father", "tip": "Long open back vowel", "type": "vowel"},
|
| 144 |
+
"Ι": {"name": "LOT vowel", "example": "hot", "tip": "Short open back rounded vowel", "type": "vowel"},
|
| 145 |
+
|
| 146 |
+
"eΙͺ": {"name": "FACE diphthong", "example": "day", "tip": "Glide from e to Ιͺ", "type": "diphthong"},
|
| 147 |
+
"aΙͺ": {"name": "PRICE diphthong", "example": "eye", "tip": "Glide from a to Ιͺ", "type": "diphthong"},
|
| 148 |
+
"ΙΙͺ": {"name": "CHOICE diphthong", "example": "boy", "tip": "Glide from Ι to Ιͺ", "type": "diphthong"},
|
| 149 |
+
"aΚ": {"name": "MOUTH diphthong", "example": "now", "tip": "Glide from a to Κ", "type": "diphthong"},
|
| 150 |
+
"ΙΚ": {"name": "GOAT diphthong", "example": "go", "tip": "Glide from Ι to Κ", "type": "diphthong"},
|
| 151 |
+
|
| 152 |
+
"p": {"name": "voiceless bilabial plosive", "example": "pen", "tip": "Explosive 'p' sound", "type": "consonant"},
|
| 153 |
+
"b": {"name": "voiced bilabial plosive", "example": "bad", "tip": "Voiced 'b' with vibration", "type": "consonant"},
|
| 154 |
+
"t": {"name": "voiceless alveolar plosive", "example": "tea", "tip": "Tongue tip on alveolar ridge", "type": "consonant"},
|
| 155 |
+
"d": {"name": "voiced alveolar plosive", "example": "did", "tip": "Voiced 'd' with vibration", "type": "consonant"},
|
| 156 |
+
"k": {"name": "voiceless velar plosive", "example": "cat", "tip": "Back of tongue on soft palate", "type": "consonant"},
|
| 157 |
+
"Ι‘": {"name": "voiced velar plosive", "example": "get", "tip": "Voiced 'g' with vibration", "type": "consonant"},
|
| 158 |
+
"tΚ": {"name": "voiceless palato-alveolar affricate", "example": "chin", "tip": "Combination of 't' and 'Κ'", "type": "consonant"},
|
| 159 |
+
"dΚ": {"name": "voiced palato-alveolar affricate", "example": "jam", "tip": "Combination of 'd' and 'Κ'", "type": "consonant"},
|
| 160 |
+
"f": {"name": "voiceless labiodental fricative", "example": "fall", "tip": "Upper teeth on lower lip", "type": "consonant"},
|
| 161 |
+
"v": {"name": "voiced labiodental fricative", "example": "van", "tip": "Voiced version of 'f'", "type": "consonant"},
|
| 162 |
+
"ΞΈ": {"name": "voiceless dental fricative", "example": "thin", "tip": "Tongue between teeth, no vibration", "type": "consonant"},
|
| 163 |
+
"Γ°": {"name": "voiced dental fricative", "example": "then", "tip": "Tongue between teeth, with vibration", "type": "consonant"},
|
| 164 |
+
"s": {"name": "voiceless alveolar fricative", "example": "see", "tip": "Hissing 's' sound", "type": "consonant"},
|
| 165 |
+
"z": {"name": "voiced alveolar fricative", "example": "zoo", "tip": "Voiced 'z' sound", "type": "consonant"},
|
| 166 |
+
"Κ": {"name": "voiceless palato-alveolar fricative", "example": "she", "tip": "'Sh' sound, tongue raised", "type": "consonant"},
|
| 167 |
+
"Κ": {"name": "voiced palato-alveolar fricative", "example": "pleasure", "tip": "Voiced 'zh' sound", "type": "consonant"},
|
| 168 |
+
"h": {"name": "voiceless glottal fricative", "example": "hot", "tip": "Breathy 'h' from throat", "type": "consonant"},
|
| 169 |
+
"m": {"name": "bilabial nasal", "example": "man", "tip": "Humming 'm' with lips closed", "type": "consonant"},
|
| 170 |
+
"n": {"name": "alveolar nasal", "example": "no", "tip": "Tongue on alveolar ridge", "type": "consonant"},
|
| 171 |
+
"Ε": {"name": "velar nasal", "example": "sing", "tip": "'Ng' sound, back of tongue up", "type": "consonant"},
|
| 172 |
+
"l": {"name": "alveolar lateral approximant", "example": "let", "tip": "Tongue tip on alveolar ridge", "type": "consonant"},
|
| 173 |
+
"r": {"name": "alveolar approximant", "example": "red", "tip": "UK 'r' is soft", "type": "consonant"},
|
| 174 |
+
"j": {"name": "palatal approximant", "example": "yes", "tip": "'Y' sound", "type": "consonant"},
|
| 175 |
+
"w": {"name": "labio-velar approximant", "example": "we", "tip": "Round lips", "type": "consonant"},
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
# Common words with syllable info
|
| 179 |
+
COMMON_UK_WORDS = {
|
| 180 |
+
"rabbit": {"phonemes": ["r", "Γ¦", "b", "Ιͺ", "t"], "syllables": 2, "stress": "first"},
|
| 181 |
+
"dog": {"phonemes": ["d", "Ι", "Ι‘"], "syllables": 1, "stress": "only"},
|
| 182 |
+
"cat": {"phonemes": ["k", "Γ¦", "t"], "syllables": 1, "stress": "only"},
|
| 183 |
+
"water": {"phonemes": ["w", "ΙΛ", "t", "Ι"], "syllables": 2, "stress": "first"},
|
| 184 |
+
"hello": {"phonemes": ["h", "Ι", "l", "ΙΚ"], "syllables": 2, "stress": "second"},
|
| 185 |
+
"banana": {"phonemes": ["b", "Ι", "n", "ΙΛ", "n", "Ι"], "syllables": 3, "stress": "second"},
|
| 186 |
+
"computer": {"phonemes": ["k", "Ι", "m", "p", "j", "uΛ", "t", "Ι"], "syllables": 3, "stress": "second"},
|
| 187 |
+
"elephant": {"phonemes": ["Ι", "l", "Ιͺ", "f", "Ι", "n", "t"], "syllables": 3, "stress": "first"},
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
def get_uk_pronunciation(word):
|
| 191 |
+
"""Get UK pronunciation with syllable info."""
|
| 192 |
+
word_lower = word.lower().strip()
|
| 193 |
+
|
| 194 |
+
if word_lower in COMMON_UK_WORDS:
|
| 195 |
+
return COMMON_UK_WORDS[word_lower]["phonemes"]
|
| 196 |
+
|
| 197 |
+
try:
|
| 198 |
+
ipa_str = ipa.convert(word)
|
| 199 |
+
clean_ipa = re.sub(r'[ΛΛΛ]', '', ipa_str)
|
| 200 |
+
|
| 201 |
+
phonemes = []
|
| 202 |
+
i = 0
|
| 203 |
+
while i < len(clean_ipa):
|
| 204 |
+
if i + 1 < len(clean_ipa):
|
| 205 |
+
two_char = clean_ipa[i:i+2]
|
| 206 |
+
if two_char in ['eΙͺ', 'aΙͺ', 'ΙΙͺ', 'aΚ', 'ΙΚ', 'tΚ', 'dΚ']:
|
| 207 |
+
phonemes.append(two_char)
|
| 208 |
+
i += 2
|
| 209 |
+
continue
|
| 210 |
+
phonemes.append(clean_ipa[i])
|
| 211 |
+
i += 1
|
| 212 |
+
|
| 213 |
+
return phonemes
|
| 214 |
+
except Exception as e:
|
| 215 |
+
print(f"Error getting IPA for {word}: {e}")
|
| 216 |
+
if word_lower == "rabbit":
|
| 217 |
+
return ["r", "Γ¦", "b", "Ιͺ", "t"]
|
| 218 |
+
phonemes = []
|
| 219 |
+
for char in word_lower:
|
| 220 |
+
if char in 'aeiou':
|
| 221 |
+
vowel_map = {'a': 'Γ¦', 'e': 'Ι', 'i': 'Ιͺ', 'o': 'Ι', 'u': 'Κ'}
|
| 222 |
+
phonemes.append(vowel_map.get(char, char))
|
| 223 |
+
elif char == 'g':
|
| 224 |
+
phonemes.append('Ι‘')
|
| 225 |
+
else:
|
| 226 |
+
phonemes.append(char)
|
| 227 |
+
return phonemes
|
| 228 |
+
|
| 229 |
+
def get_word_info(word):
|
| 230 |
+
"""Get syllable and stress info for a word."""
|
| 231 |
+
word_lower = word.lower().strip()
|
| 232 |
+
|
| 233 |
+
if word_lower in COMMON_UK_WORDS:
|
| 234 |
+
return {
|
| 235 |
+
"syllables": COMMON_UK_WORDS[word_lower]["syllables"],
|
| 236 |
+
"stress": COMMON_UK_WORDS[word_lower]["stress"]
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
phonemes = get_uk_pronunciation(word)
|
| 240 |
+
vowel_count = sum(1 for p in phonemes
|
| 241 |
+
if UK_PHONEME_DB.get(p, {}).get('type') in ['vowel', 'diphthong'])
|
| 242 |
+
|
| 243 |
+
if vowel_count == 1:
|
| 244 |
+
stress = "only"
|
| 245 |
+
elif vowel_count == 2:
|
| 246 |
+
stress = "first"
|
| 247 |
+
else:
|
| 248 |
+
stress = "second"
|
| 249 |
+
|
| 250 |
+
return {
|
| 251 |
+
"syllables": vowel_count,
|
| 252 |
+
"stress": stress
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
# ==================================================
|
| 256 |
+
# 4. CORRECTED PHONEME ANALYSIS
|
| 257 |
+
# ==================================================
|
| 258 |
+
|
| 259 |
+
def is_exact_phoneme_match(ref, stu):
|
| 260 |
+
"""STRICT matching for accurate scoring."""
|
| 261 |
+
if not stu:
|
| 262 |
+
return False
|
| 263 |
+
|
| 264 |
+
ref_norm = ref.replace('Λ', '')
|
| 265 |
+
stu_norm = stu.replace('Λ', '')
|
| 266 |
+
|
| 267 |
+
if ref_norm == stu_norm:
|
| 268 |
+
return True
|
| 269 |
+
|
| 270 |
+
uk_variations = {
|
| 271 |
+
'Ι': ['Ι'], 'Ι': ['Ι'],
|
| 272 |
+
'Ιͺ': ['i'], 'Ι': ['e'],
|
| 273 |
+
'Ι‘': ['g'], 'Γ¦': ['a'],
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
if ref_norm in uk_variations and stu_norm in uk_variations[ref_norm]:
|
| 277 |
+
return 0.5
|
| 278 |
+
|
| 279 |
+
return False
|
| 280 |
+
|
| 281 |
+
def analyze_pronunciation_strict(student_phonemes, reference_phonemes, word):
|
| 282 |
+
"""STRICT analysis."""
|
| 283 |
+
if not student_phonemes:
|
| 284 |
+
return {
|
| 285 |
+
"score": 0,
|
| 286 |
+
"errors": [],
|
| 287 |
+
"exact_correct": 0,
|
| 288 |
+
"partial_correct": 0,
|
| 289 |
+
"total_expected": len(reference_phonemes) if reference_phonemes else 0,
|
| 290 |
+
"accuracy_percentage": 0,
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
min_len = min(len(student_phonemes), len(reference_phonemes))
|
| 294 |
+
exact_correct = 0
|
| 295 |
+
partial_correct = 0
|
| 296 |
+
errors = []
|
| 297 |
+
|
| 298 |
+
for i in range(min_len):
|
| 299 |
+
ref = reference_phonemes[i]
|
| 300 |
+
stu = student_phonemes[i]
|
| 301 |
+
match_result = is_exact_phoneme_match(ref, stu)
|
| 302 |
+
|
| 303 |
+
if match_result == True:
|
| 304 |
+
exact_correct += 1
|
| 305 |
+
elif match_result == 0.5:
|
| 306 |
+
partial_correct += 0.5
|
| 307 |
+
else:
|
| 308 |
+
errors.append({
|
| 309 |
+
"position": i + 1,
|
| 310 |
+
"expected": ref,
|
| 311 |
+
"said": stu,
|
| 312 |
+
"type": UK_PHONEME_DB.get(ref, {}).get("type", "unknown"),
|
| 313 |
+
})
|
| 314 |
+
|
| 315 |
+
total_expected = len(reference_phonemes) if reference_phonemes else 0
|
| 316 |
+
if total_expected == 0:
|
| 317 |
+
score = 0
|
| 318 |
+
else:
|
| 319 |
+
base_score = (exact_correct + partial_correct) / total_expected * 100
|
| 320 |
+
|
| 321 |
+
if len(student_phonemes) < len(reference_phonemes):
|
| 322 |
+
missing_penalty = (len(reference_phonemes) - len(student_phonemes)) / len(reference_phonemes) * 30
|
| 323 |
+
base_score = max(0, base_score - missing_penalty)
|
| 324 |
+
|
| 325 |
+
if len(student_phonemes) > len(reference_phonemes):
|
| 326 |
+
extra_penalty = (len(student_phonemes) - len(reference_phonemes)) / len(reference_phonemes) * 20
|
| 327 |
+
base_score = max(0, base_score - extra_penalty)
|
| 328 |
+
|
| 329 |
+
score = round(max(0, min(100, base_score)), 1)
|
| 330 |
+
|
| 331 |
+
accuracy_percentage = round((exact_correct + partial_correct) / total_expected * 100, 1) if total_expected > 0 else 0
|
| 332 |
+
|
| 333 |
+
return {
|
| 334 |
+
"score": score,
|
| 335 |
+
"errors": errors,
|
| 336 |
+
"exact_correct": exact_correct,
|
| 337 |
+
"partial_correct": partial_correct,
|
| 338 |
+
"total_expected": total_expected,
|
| 339 |
+
"accuracy_percentage": accuracy_percentage,
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
# ==================================================
|
| 343 |
+
# 5. SCENARIO DETECTION
|
| 344 |
+
# ==================================================
|
| 345 |
+
|
| 346 |
+
class ScenarioDetector:
|
| 347 |
+
"""Scenario detection with correct priorities."""
|
| 348 |
+
|
| 349 |
+
SCENARIO_PRIORITIES = [
|
| 350 |
+
'silence',
|
| 351 |
+
'multiple_words',
|
| 352 |
+
'wrong_word',
|
| 353 |
+
'syllable',
|
| 354 |
+
'ending',
|
| 355 |
+
'vowel',
|
| 356 |
+
'consonant',
|
| 357 |
+
'stress',
|
| 358 |
+
'success',
|
| 359 |
+
]
|
| 360 |
+
|
| 361 |
+
@staticmethod
|
| 362 |
+
def detect_silence(student_phonemes, audio_error=None):
|
| 363 |
+
if audio_error:
|
| 364 |
+
error_lower = audio_error.lower()
|
| 365 |
+
if any(x in error_lower for x in ['silence', 'quiet', 'empty']):
|
| 366 |
+
return {
|
| 367 |
+
'scenario': 'silence',
|
| 368 |
+
'category': 'silence',
|
| 369 |
+
'confidence': 1.0,
|
| 370 |
+
'feedback': "I couldn't hear anything. Please speak louder.",
|
| 371 |
+
'action': "increase_volume"
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
if not student_phonemes or len(student_phonemes) == 0:
|
| 375 |
+
return {
|
| 376 |
+
'scenario': 'silence',
|
| 377 |
+
'category': 'silence',
|
| 378 |
+
'confidence': 0.9,
|
| 379 |
+
'feedback': "No speech detected.",
|
| 380 |
+
'action': "check_microphone"
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
return None
|
| 384 |
+
|
| 385 |
+
@staticmethod
|
| 386 |
+
def detect_multiple_words(student_phonemes, reference_phonemes):
|
| 387 |
+
if not student_phonemes:
|
| 388 |
+
return None
|
| 389 |
+
|
| 390 |
+
if len(student_phonemes) > len(reference_phonemes) * 2:
|
| 391 |
+
return {
|
| 392 |
+
'scenario': 'multiple_words',
|
| 393 |
+
'category': 'multiple_words',
|
| 394 |
+
'confidence': 0.8,
|
| 395 |
+
'feedback': "I heard multiple words. Please say only one word.",
|
| 396 |
+
'action': "speak_single_word"
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
return None
|
| 400 |
+
|
| 401 |
+
@staticmethod
|
| 402 |
+
def detect_wrong_word(student_phonemes, reference_phonemes, word):
|
| 403 |
+
if not student_phonemes or not reference_phonemes:
|
| 404 |
+
return None
|
| 405 |
+
|
| 406 |
+
min_len = min(len(student_phonemes), len(reference_phonemes))
|
| 407 |
+
if min_len == 0:
|
| 408 |
+
return None
|
| 409 |
+
|
| 410 |
+
matches = 0
|
| 411 |
+
for i in range(min_len):
|
| 412 |
+
ref = reference_phonemes[i]
|
| 413 |
+
stu = student_phonemes[i]
|
| 414 |
+
if is_exact_phoneme_match(ref, stu):
|
| 415 |
+
matches += 1
|
| 416 |
+
|
| 417 |
+
similarity = matches / len(reference_phonemes) if len(reference_phonemes) > 0 else 0
|
| 418 |
+
|
| 419 |
+
if similarity < 0.3:
|
| 420 |
+
return {
|
| 421 |
+
'scenario': 'wrong_word',
|
| 422 |
+
'category': 'wrong_word',
|
| 423 |
+
'confidence': 0.9,
|
| 424 |
+
'feedback': f"That doesn't sound like '{word}'.",
|
| 425 |
+
'action': "repeat_target_word"
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
return None
|
| 429 |
+
|
| 430 |
+
@staticmethod
|
| 431 |
+
def detect_syllable_issues(student_phonemes, reference_phonemes, word):
|
| 432 |
+
if not student_phonemes or not reference_phonemes:
|
| 433 |
+
return None
|
| 434 |
+
|
| 435 |
+
word_info = get_word_info(word)
|
| 436 |
+
ref_syllables = word_info["syllables"]
|
| 437 |
+
|
| 438 |
+
stu_vowels = sum(1 for p in student_phonemes
|
| 439 |
+
if UK_PHONEME_DB.get(p, {}).get('type') in ['vowel', 'diphthong'])
|
| 440 |
+
|
| 441 |
+
if stu_vowels == 0 and len(student_phonemes) > 0:
|
| 442 |
+
return {
|
| 443 |
+
'scenario': 'syllable',
|
| 444 |
+
'category': 'syllable',
|
| 445 |
+
'confidence': 0.9,
|
| 446 |
+
'feedback': f"Missing vowel sounds. '{word}' needs vowel pronunciation.",
|
| 447 |
+
'action': "add_vowel_sounds"
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
if ref_syllables >= 2 and abs(stu_vowels - ref_syllables) >= 1:
|
| 451 |
+
missing_count = len(reference_phonemes) - len(student_phonemes)
|
| 452 |
+
if missing_count >= 2 and stu_vowels < ref_syllables:
|
| 453 |
+
return {
|
| 454 |
+
'scenario': 'syllable',
|
| 455 |
+
'category': 'syllable',
|
| 456 |
+
'confidence': 0.8,
|
| 457 |
+
'feedback': f"'{word}' has {ref_syllables} syllable(s). You're missing a syllable.",
|
| 458 |
+
'action': "add_syllables"
|
| 459 |
+
}
|
| 460 |
+
elif stu_vowels > ref_syllables:
|
| 461 |
+
return {
|
| 462 |
+
'scenario': 'syllable',
|
| 463 |
+
'category': 'syllable',
|
| 464 |
+
'confidence': 0.7,
|
| 465 |
+
'feedback': f"'{word}' has {ref_syllables} syllable(s). You added extra sounds.",
|
| 466 |
+
'action': "reduce_syllables"
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
return None
|
| 470 |
+
|
| 471 |
+
@staticmethod
|
| 472 |
+
def detect_ending_issues(student_phonemes, reference_phonemes):
|
| 473 |
+
if not student_phonemes or not reference_phonemes:
|
| 474 |
+
return None
|
| 475 |
+
|
| 476 |
+
if len(student_phonemes) < len(reference_phonemes):
|
| 477 |
+
missing_count = len(reference_phonemes) - len(student_phonemes)
|
| 478 |
+
if missing_count == 1:
|
| 479 |
+
missing_sound = reference_phonemes[-1]
|
| 480 |
+
return {
|
| 481 |
+
'scenario': 'ending',
|
| 482 |
+
'category': 'ending',
|
| 483 |
+
'confidence': 0.8,
|
| 484 |
+
'feedback': f"You're missing the final sound: '{missing_sound}'.",
|
| 485 |
+
'action': "complete_ending",
|
| 486 |
+
'target_phoneme': missing_sound
|
| 487 |
+
}
|
| 488 |
+
elif missing_count > 1:
|
| 489 |
+
missing_part = reference_phonemes[-missing_count:]
|
| 490 |
+
missing_vowels = sum(1 for p in missing_part
|
| 491 |
+
if UK_PHONEME_DB.get(p, {}).get('type') in ['vowel', 'diphthong'])
|
| 492 |
+
if missing_vowels == 0:
|
| 493 |
+
return {
|
| 494 |
+
'scenario': 'ending',
|
| 495 |
+
'category': 'ending',
|
| 496 |
+
'confidence': 0.7,
|
| 497 |
+
'feedback': f"You're missing the ending: '{''.join(missing_part)}'.",
|
| 498 |
+
'action': "complete_ending"
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
if len(student_phonemes) >= 1 and len(reference_phonemes) >= 1:
|
| 502 |
+
final_stu = student_phonemes[-1]
|
| 503 |
+
final_ref = reference_phonemes[-1]
|
| 504 |
+
|
| 505 |
+
if not is_exact_phoneme_match(final_ref, final_stu):
|
| 506 |
+
return {
|
| 507 |
+
'scenario': 'ending',
|
| 508 |
+
'category': 'ending',
|
| 509 |
+
'confidence': 0.7,
|
| 510 |
+
'feedback': f"Final sound should be '{final_ref}' not '{final_stu}'.",
|
| 511 |
+
'action': "correct_final_sound",
|
| 512 |
+
'target_phoneme': final_ref
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
return None
|
| 516 |
+
|
| 517 |
+
@staticmethod
|
| 518 |
+
def detect_vowel_issues(student_phonemes, reference_phonemes):
|
| 519 |
+
if not student_phonemes or not reference_phonemes:
|
| 520 |
+
return None
|
| 521 |
+
|
| 522 |
+
vowel_errors = []
|
| 523 |
+
min_len = min(len(student_phonemes), len(reference_phonemes))
|
| 524 |
+
|
| 525 |
+
for i in range(min_len):
|
| 526 |
+
ref = reference_phonemes[i]
|
| 527 |
+
stu = student_phonemes[i]
|
| 528 |
+
|
| 529 |
+
ref_info = UK_PHONEME_DB.get(ref, {})
|
| 530 |
+
if ref_info.get('type') in ['vowel', 'diphthong']:
|
| 531 |
+
if not is_exact_phoneme_match(ref, stu):
|
| 532 |
+
vowel_errors.append({
|
| 533 |
+
'position': i + 1,
|
| 534 |
+
'expected': ref,
|
| 535 |
+
'actual': stu,
|
| 536 |
+
'tip': f"Use {ref} sound",
|
| 537 |
+
})
|
| 538 |
+
|
| 539 |
+
if vowel_errors:
|
| 540 |
+
primary = vowel_errors[0]
|
| 541 |
+
return {
|
| 542 |
+
'scenario': 'vowel',
|
| 543 |
+
'category': 'vowel',
|
| 544 |
+
'confidence': 0.9,
|
| 545 |
+
'feedback': f"Vowel issue: {primary['tip']}",
|
| 546 |
+
'action': "adjust_vowel",
|
| 547 |
+
'target_phoneme': primary['expected']
|
| 548 |
+
}
|
| 549 |
+
|
| 550 |
+
return None
|
| 551 |
+
|
| 552 |
+
@staticmethod
|
| 553 |
+
def detect_consonant_issues(student_phonemes, reference_phonemes):
|
| 554 |
+
if not student_phonemes or not reference_phonemes:
|
| 555 |
+
return None
|
| 556 |
+
|
| 557 |
+
consonant_errors = []
|
| 558 |
+
min_len = min(len(student_phonemes), len(reference_phonemes))
|
| 559 |
+
|
| 560 |
+
for i in range(min_len):
|
| 561 |
+
ref = reference_phonemes[i]
|
| 562 |
+
stu = student_phonemes[i]
|
| 563 |
+
|
| 564 |
+
ref_info = UK_PHONEME_DB.get(ref, {})
|
| 565 |
+
if ref_info.get('type') == 'consonant':
|
| 566 |
+
if not is_exact_phoneme_match(ref, stu):
|
| 567 |
+
consonant_errors.append({
|
| 568 |
+
'position': i + 1,
|
| 569 |
+
'expected': ref,
|
| 570 |
+
'actual': stu,
|
| 571 |
+
'tip': ref_info.get('tip', f'Articulate {ref} clearly'),
|
| 572 |
+
})
|
| 573 |
+
|
| 574 |
+
if consonant_errors:
|
| 575 |
+
primary = consonant_errors[0]
|
| 576 |
+
return {
|
| 577 |
+
'scenario': 'consonant',
|
| 578 |
+
'category': 'consonant',
|
| 579 |
+
'confidence': 0.8,
|
| 580 |
+
'feedback': f"Consonant: {primary['tip']}",
|
| 581 |
+
'action': "articulate_consonant",
|
| 582 |
+
'target_phoneme': primary['expected']
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
return None
|
| 586 |
+
|
| 587 |
+
@staticmethod
|
| 588 |
+
def detect_stress_issues(student_phonemes, reference_phonemes, word):
|
| 589 |
+
if not student_phonemes or not reference_phonemes:
|
| 590 |
+
return None
|
| 591 |
+
|
| 592 |
+
word_info = get_word_info(word)
|
| 593 |
+
if word_info["syllables"] < 2:
|
| 594 |
+
return None
|
| 595 |
+
|
| 596 |
+
correct_count = 0
|
| 597 |
+
min_len = min(len(student_phonemes), len(reference_phonemes))
|
| 598 |
+
for i in range(min_len):
|
| 599 |
+
if is_exact_phoneme_match(reference_phonemes[i], student_phonemes[i]):
|
| 600 |
+
correct_count += 1
|
| 601 |
+
|
| 602 |
+
accuracy = correct_count / len(reference_phonemes) if len(reference_phonemes) > 0 else 0
|
| 603 |
+
if accuracy >= 0.8 and word_info["syllables"] >= 2:
|
| 604 |
+
stress_pattern = {
|
| 605 |
+
"first": "first syllable",
|
| 606 |
+
"second": "second syllable",
|
| 607 |
+
"third": "third syllable"
|
| 608 |
+
}.get(word_info["stress"], "correct syllable")
|
| 609 |
+
|
| 610 |
+
return {
|
| 611 |
+
'scenario': 'stress',
|
| 612 |
+
'category': 'stress',
|
| 613 |
+
'confidence': 0.6,
|
| 614 |
+
'feedback': f"For '{word}', emphasize the {stress_pattern}.",
|
| 615 |
+
'action': "practice_stress"
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
return None
|
| 619 |
+
|
| 620 |
+
@staticmethod
|
| 621 |
+
def detect_success(analysis_result, score):
|
| 622 |
+
if not analysis_result:
|
| 623 |
+
return None
|
| 624 |
+
|
| 625 |
+
if score >= 95:
|
| 626 |
+
return {
|
| 627 |
+
'scenario': 'success',
|
| 628 |
+
'category': 'success',
|
| 629 |
+
'confidence': 1.0,
|
| 630 |
+
'feedback': "Excellent pronunciation! Perfect! π",
|
| 631 |
+
'action': "continue_excellent_work"
|
| 632 |
+
}
|
| 633 |
+
elif score >= 85:
|
| 634 |
+
return {
|
| 635 |
+
'scenario': 'success',
|
| 636 |
+
'category': 'success',
|
| 637 |
+
'confidence': 0.9,
|
| 638 |
+
'feedback': "Very good pronunciation!",
|
| 639 |
+
'action': "refine_pronunciation"
|
| 640 |
+
}
|
| 641 |
+
elif score >= 75:
|
| 642 |
+
return {
|
| 643 |
+
'scenario': 'success',
|
| 644 |
+
'category': 'success',
|
| 645 |
+
'confidence': 0.8,
|
| 646 |
+
'feedback': "Good pronunciation! Keep practicing.",
|
| 647 |
+
'action': "practice_more"
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
return None
|
| 651 |
+
|
| 652 |
+
@classmethod
|
| 653 |
+
def detect_scenarios(cls, student_phonemes, reference_phonemes, word, analysis_result, audio_error=None):
|
| 654 |
+
score = analysis_result.get('score', 0) if analysis_result else 0
|
| 655 |
+
|
| 656 |
+
detectors = [
|
| 657 |
+
('silence', lambda: cls.detect_silence(student_phonemes, audio_error)),
|
| 658 |
+
('multiple_words', lambda: cls.detect_multiple_words(student_phonemes, reference_phonemes)),
|
| 659 |
+
('wrong_word', lambda: cls.detect_wrong_word(student_phonemes, reference_phonemes, word)),
|
| 660 |
+
('syllable', lambda: cls.detect_syllable_issues(student_phonemes, reference_phonemes, word)),
|
| 661 |
+
('vowel', lambda: cls.detect_vowel_issues(student_phonemes, reference_phonemes)),
|
| 662 |
+
('consonant', lambda: cls.detect_consonant_issues(student_phonemes, reference_phonemes)),
|
| 663 |
+
('ending', lambda: cls.detect_ending_issues(student_phonemes, reference_phonemes)),
|
| 664 |
+
('stress', lambda: cls.detect_stress_issues(student_phonemes, reference_phonemes, word)),
|
| 665 |
+
('success', lambda: cls.detect_success(analysis_result, score)),
|
| 666 |
+
]
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
for scenario_name, detector_func in detectors:
|
| 670 |
+
result = detector_func()
|
| 671 |
+
if result:
|
| 672 |
+
if scenario_name == 'success' and score < 75:
|
| 673 |
+
continue
|
| 674 |
+
return result
|
| 675 |
+
|
| 676 |
+
return {
|
| 677 |
+
'scenario': 'needs_improvement',
|
| 678 |
+
'category': 'general',
|
| 679 |
+
'confidence': 0.5,
|
| 680 |
+
'feedback': "Pronunciation needs improvement.",
|
| 681 |
+
'action': "practice_sounds"
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
# ==================================================
|
| 685 |
+
# 6. IMPROVED VIDEO RAG BUILDER - MERGES MULTIPLE PORTIONS
|
| 686 |
+
# ==================================================
|
| 687 |
+
|
| 688 |
+
# ==================================================
|
| 689 |
+
# 6. IMPROVED VIDEO RAG BUILDER - SMART SELECTION
|
| 690 |
+
# ==================================================
|
| 691 |
+
|
| 692 |
+
def build_feedback_video(category, feedback_message, target_phoneme=None, student_errors=None):
|
| 693 |
+
"""
|
| 694 |
+
Build feedback video with ordered, dynamic selection:
|
| 695 |
+
- success: [praise] -> [move-to-next]
|
| 696 |
+
- vowel: [specific phoneme] -> [one general]
|
| 697 |
+
- consonant: [specific phoneme] -> [one general]
|
| 698 |
+
- other categories: keep balanced/general strategies as before (2β3 clips)
|
| 699 |
+
|
| 700 |
+
Returns:
|
| 701 |
+
Base64 encoded video string with multiple merged clips
|
| 702 |
+
"""
|
| 703 |
+
print(f"\n=== Building video for: {category} ===")
|
| 704 |
+
print(f"Target phoneme: {target_phoneme}")
|
| 705 |
+
print(f"Student errors: {student_errors}")
|
| 706 |
+
|
| 707 |
+
# Extract target phoneme from errors if not provided
|
| 708 |
+
if not target_phoneme and student_errors:
|
| 709 |
+
for error in student_errors:
|
| 710 |
+
if error.get("type") in ["vowel", "diphthong", "consonant"]:
|
| 711 |
+
target_phoneme = error.get("expected")
|
| 712 |
+
if target_phoneme:
|
| 713 |
+
print(f"Extracted target phoneme from errors: {target_phoneme}")
|
| 714 |
+
break
|
| 715 |
+
|
| 716 |
+
# Extract target phoneme from feedback if present
|
| 717 |
+
if not target_phoneme:
|
| 718 |
+
m = re.search(r"'([^']+)'", feedback_message)
|
| 719 |
+
target_phoneme = m.group(1) if m else None
|
| 720 |
+
if target_phoneme:
|
| 721 |
+
print(f"Extracted target phoneme from feedback: {target_phoneme}")
|
| 722 |
+
|
| 723 |
+
selected_metadatas = []
|
| 724 |
+
|
| 725 |
+
try:
|
| 726 |
+
# Pull category clips
|
| 727 |
+
gen_results = collection.get(where={"category": category})
|
| 728 |
+
if not gen_results or not gen_results.get('metadatas'):
|
| 729 |
+
print(f"No clips found for category: {category}")
|
| 730 |
+
return ""
|
| 731 |
+
|
| 732 |
+
metadatas = gen_results['metadatas']
|
| 733 |
+
documents = gen_results.get('documents', [])
|
| 734 |
+
# Safe zip in case of mismatch
|
| 735 |
+
items = []
|
| 736 |
+
for idx, meta in enumerate(metadatas):
|
| 737 |
+
text = documents[idx] if idx < len(documents) else ""
|
| 738 |
+
items.append({"meta": meta, "text": text})
|
| 739 |
+
|
| 740 |
+
# Split generic vs specific (for vowel/consonant)
|
| 741 |
+
generic_clips = []
|
| 742 |
+
specific_clips = [] # list of tuples (meta, phoneme)
|
| 743 |
+
for it in items:
|
| 744 |
+
meta = it["meta"]
|
| 745 |
+
clip_phoneme = meta.get("phoneme")
|
| 746 |
+
if clip_phoneme:
|
| 747 |
+
specific_clips.append((meta, clip_phoneme))
|
| 748 |
+
else:
|
| 749 |
+
# attach text for success/vowel/consonant classification later
|
| 750 |
+
meta_copy = dict(meta)
|
| 751 |
+
meta_copy["_text"] = it["text"]
|
| 752 |
+
generic_clips.append(meta_copy)
|
| 753 |
+
|
| 754 |
+
print(f"Found {len(generic_clips)} generic clips, {len(specific_clips)} specific clips")
|
| 755 |
+
|
| 756 |
+
# Special ordering rules
|
| 757 |
+
if category == "success":
|
| 758 |
+
# First: praise message, then: move-next message (both random, dynamic)
|
| 759 |
+
praise_keywords = ["good", "great", "perfect", "excellent", "well done", "nice", "clear"]
|
| 760 |
+
next_keywords = ["next", "move"]
|
| 761 |
+
|
| 762 |
+
# Build pools from generic success clips using text
|
| 763 |
+
praise_pool = [m for m in generic_clips if any(k in m.get("_text", "").lower() for k in praise_keywords)]
|
| 764 |
+
next_pool = [m for m in generic_clips if any(k in m.get("_text", "").lower() for k in next_keywords)]
|
| 765 |
+
|
| 766 |
+
print(f"Success classification: praise={len(praise_pool)} next={len(next_pool)}")
|
| 767 |
+
|
| 768 |
+
# Pick first (praise) randomly
|
| 769 |
+
first_clip = random.choice(praise_pool) if praise_pool else (random.choice(generic_clips) if generic_clips else None)
|
| 770 |
+
|
| 771 |
+
# Pick second (move-next) randomly and ensure different from first
|
| 772 |
+
if next_pool:
|
| 773 |
+
next_candidates = [m for m in next_pool if f"{m.get('start')}_{m.get('end')}" != f"{first_clip.get('start')}_{first_clip.get('end')}" ] if first_clip else next_pool
|
| 774 |
+
second_clip = random.choice(next_candidates) if next_candidates else None
|
| 775 |
+
else:
|
| 776 |
+
# Fallback: pick any other success generic clip different from first
|
| 777 |
+
alt_candidates = [m for m in generic_clips if f"{m.get('start')}_{m.get('end')}" != f"{first_clip.get('start')}_{first_clip.get('end')}" ] if first_clip else generic_clips
|
| 778 |
+
second_clip = random.choice(alt_candidates) if len(alt_candidates) > 0 else None
|
| 779 |
+
|
| 780 |
+
selected_metadatas.clear()
|
| 781 |
+
if first_clip:
|
| 782 |
+
selected_metadatas.append(first_clip)
|
| 783 |
+
if second_clip:
|
| 784 |
+
selected_metadatas.append(second_clip)
|
| 785 |
+
|
| 786 |
+
elif category in ["vowel", "consonant"]:
|
| 787 |
+
# Specific first, then exactly one general
|
| 788 |
+
specific_found = False
|
| 789 |
+
|
| 790 |
+
# 1) exact phoneme
|
| 791 |
+
if target_phoneme:
|
| 792 |
+
for meta, clip_phoneme in specific_clips:
|
| 793 |
+
if clip_phoneme == target_phoneme:
|
| 794 |
+
selected_metadatas.append(meta)
|
| 795 |
+
specific_found = True
|
| 796 |
+
print(f"β Selected specific {category} clip for phoneme: {target_phoneme}")
|
| 797 |
+
break
|
| 798 |
+
|
| 799 |
+
# 2) related fallback (mostly for vowels)
|
| 800 |
+
if not specific_found and target_phoneme and category == "vowel":
|
| 801 |
+
vowel_groups = {
|
| 802 |
+
'Ιͺ': ['iΛ', 'i'], 'iΛ': ['Ιͺ', 'i'],
|
| 803 |
+
'Γ¦': ['a', 'ΙΛ'], 'ΙΛ': ['Γ¦', 'a'],
|
| 804 |
+
'Κ': ['uΛ', 'u'], 'uΛ': ['Κ', 'u'],
|
| 805 |
+
'Ι': ['Ι', 'ΙΛ'], 'ΙΛ': ['Ι', 'Ι'],
|
| 806 |
+
}
|
| 807 |
+
related_phonemes = vowel_groups.get(target_phoneme, [])
|
| 808 |
+
for meta, clip_phoneme in specific_clips:
|
| 809 |
+
if clip_phoneme in related_phonemes:
|
| 810 |
+
selected_metadatas.append(meta)
|
| 811 |
+
specific_found = True
|
| 812 |
+
print(f"β Selected related vowel clip: {clip_phoneme} for target {target_phoneme}")
|
| 813 |
+
break
|
| 814 |
+
|
| 815 |
+
# 3) If still not found and we have any specific clip with same category, prefer one that exists
|
| 816 |
+
if not specific_found and specific_clips:
|
| 817 |
+
fallback_meta, fallback_ph = random.choice(specific_clips)
|
| 818 |
+
selected_metadatas.append(fallback_meta)
|
| 819 |
+
specific_found = True
|
| 820 |
+
print(f"β Fallback to available specific {category} clip: {fallback_ph}")
|
| 821 |
+
|
| 822 |
+
# Then exactly one general
|
| 823 |
+
if generic_clips:
|
| 824 |
+
general_choice = random.choice(generic_clips)
|
| 825 |
+
selected_metadatas.append(general_choice)
|
| 826 |
+
print("β Added one general clip after specific")
|
| 827 |
+
|
| 828 |
+
# Note: If no generic and only specific found, we keep only one clip.
|
| 829 |
+
# If no specific and generic exists, we keep one general clip (as requested βonly one generalβ).
|
| 830 |
+
|
| 831 |
+
else:
|
| 832 |
+
# Keep existing smart strategy for other categories
|
| 833 |
+
selection_strategy = "balanced"
|
| 834 |
+
if category in ["syllable", "ending", "stress"]:
|
| 835 |
+
selection_strategy = "general_focus"
|
| 836 |
+
|
| 837 |
+
print(f"Using selection strategy: {selection_strategy}")
|
| 838 |
+
|
| 839 |
+
if selection_strategy == "general_focus":
|
| 840 |
+
if generic_clips:
|
| 841 |
+
selected_generic = random.sample(generic_clips, min(2, len(generic_clips)))
|
| 842 |
+
selected_metadatas.extend(selected_generic)
|
| 843 |
+
# Add a specific if relevant and space remains
|
| 844 |
+
if target_phoneme and len(selected_metadatas) < 3:
|
| 845 |
+
for meta, clip_phoneme in specific_clips:
|
| 846 |
+
if clip_phoneme == target_phoneme:
|
| 847 |
+
selected_metadatas.append(meta)
|
| 848 |
+
print(f"β Added specific clip for: {target_phoneme}")
|
| 849 |
+
break
|
| 850 |
+
else:
|
| 851 |
+
# balanced
|
| 852 |
+
if generic_clips:
|
| 853 |
+
selected_metadatas.append(random.choice(generic_clips))
|
| 854 |
+
if target_phoneme:
|
| 855 |
+
for meta, clip_phoneme in specific_clips:
|
| 856 |
+
if clip_phoneme == target_phoneme:
|
| 857 |
+
selected_metadatas.append(meta)
|
| 858 |
+
print(f"β Selected specific clip for: {target_phoneme}")
|
| 859 |
+
break
|
| 860 |
+
# Fill with additional generic if needed
|
| 861 |
+
if len(selected_metadatas) < 2 and generic_clips:
|
| 862 |
+
remaining = [c for c in generic_clips if c not in selected_metadatas]
|
| 863 |
+
if remaining:
|
| 864 |
+
selected_metadatas.append(random.choice(remaining))
|
| 865 |
+
|
| 866 |
+
# Remove duplicates while preserving order
|
| 867 |
+
unique_metadatas = []
|
| 868 |
+
seen = set()
|
| 869 |
+
for meta in selected_metadatas:
|
| 870 |
+
key = f"{meta.get('start')}_{meta.get('end')}"
|
| 871 |
+
if key not in seen:
|
| 872 |
+
seen.add(key)
|
| 873 |
+
unique_metadatas.append(meta)
|
| 874 |
+
|
| 875 |
+
selected_metadatas = unique_metadatas
|
| 876 |
+
|
| 877 |
+
# Ensure minimum clips but DO NOT violate vowel/consonant rule (only one general)
|
| 878 |
+
if category not in ["vowel", "consonant"]:
|
| 879 |
+
if len(selected_metadatas) < 2 and generic_clips:
|
| 880 |
+
needed = 2 - len(selected_metadatas)
|
| 881 |
+
remaining = [c for c in generic_clips if c not in selected_metadatas]
|
| 882 |
+
if remaining:
|
| 883 |
+
selected_metadatas.extend(random.sample(remaining, min(needed, len(remaining))))
|
| 884 |
+
|
| 885 |
+
if len(selected_metadatas) == 0:
|
| 886 |
+
print("No clips selected after filtering.")
|
| 887 |
+
return ""
|
| 888 |
+
|
| 889 |
+
print(f"Selected {len(selected_metadatas)} video clips:")
|
| 890 |
+
for i, meta in enumerate(selected_metadatas):
|
| 891 |
+
phoneme = meta.get('phoneme', 'generic')
|
| 892 |
+
print(f" Clip {i+1}: {meta.get('category')} - {phoneme} [{meta.get('start')}->{meta.get('end')}]")
|
| 893 |
+
|
| 894 |
+
# --- FFmpeg Processing ---
|
| 895 |
+
if not os.path.exists(VIDEO_PATH):
|
| 896 |
+
print(f"Video file not found: {VIDEO_PATH}")
|
| 897 |
+
return ""
|
| 898 |
+
|
| 899 |
+
clips = []
|
| 900 |
+
concat_file = None
|
| 901 |
+
final_video_path = None
|
| 902 |
+
|
| 903 |
+
try:
|
| 904 |
+
# Extract individual clips
|
| 905 |
+
for i, seg in enumerate(selected_metadatas):
|
| 906 |
+
tmp_clip = tempfile.NamedTemporaryFile(delete=False, suffix=f"_{i}.mp4")
|
| 907 |
+
tmp_clip.close()
|
| 908 |
+
|
| 909 |
+
# Extract segment
|
| 910 |
+
subprocess.run([
|
| 911 |
+
"ffmpeg", "-y", "-ss", str(seg["start"]), "-to", str(seg["end"]),
|
| 912 |
+
"-i", VIDEO_PATH, "-c:v", "libx264", "-preset", "ultrafast",
|
| 913 |
+
"-crf", "28", "-c:a", "aac", tmp_clip.name
|
| 914 |
+
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 915 |
+
|
| 916 |
+
clips.append(tmp_clip.name)
|
| 917 |
+
|
| 918 |
+
# Create concat list
|
| 919 |
+
concat_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w")
|
| 920 |
+
for clip_path in clips:
|
| 921 |
+
concat_file.write(f"file '{os.path.abspath(clip_path)}'\n")
|
| 922 |
+
concat_file.close()
|
| 923 |
+
|
| 924 |
+
# Create final video
|
| 925 |
+
final_video_path = tempfile.NamedTemporaryFile(delete=False, suffix="_final.mp4")
|
| 926 |
+
final_video_path.close()
|
| 927 |
+
|
| 928 |
+
# Concatenate
|
| 929 |
+
subprocess.run([
|
| 930 |
+
"ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_file.name,
|
| 931 |
+
"-c", "copy", final_video_path.name
|
| 932 |
+
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 933 |
+
|
| 934 |
+
# Encode to Base64
|
| 935 |
+
with open(final_video_path.name, "rb") as f:
|
| 936 |
+
v_data = base64.b64encode(f.read()).decode()
|
| 937 |
+
|
| 938 |
+
print(f"β Successfully merged {len(clips)} video clips")
|
| 939 |
+
return v_data
|
| 940 |
+
|
| 941 |
+
except Exception as e:
|
| 942 |
+
print(f"β Video concatenation error: {e}")
|
| 943 |
+
return ""
|
| 944 |
+
|
| 945 |
+
finally:
|
| 946 |
+
# Cleanup
|
| 947 |
+
if concat_file and os.path.exists(concat_file.name):
|
| 948 |
+
os.remove(concat_file.name)
|
| 949 |
+
|
| 950 |
+
if final_video_path and os.path.exists(final_video_path.name):
|
| 951 |
+
os.remove(final_video_path.name)
|
| 952 |
+
|
| 953 |
+
for c in clips:
|
| 954 |
+
if os.path.exists(c):
|
| 955 |
+
os.remove(c)
|
| 956 |
+
|
| 957 |
+
except Exception as e:
|
| 958 |
+
print(f"β Video generation error: {e}")
|
| 959 |
+
return ""
|
| 960 |
+
# ==================================================
|
| 961 |
+
# 7. AUDIO PROCESSING
|
| 962 |
+
# ==================================================
|
| 963 |
+
|
| 964 |
+
def process_audio_file(audio_path):
|
| 965 |
+
"""Process audio file."""
|
| 966 |
+
try:
|
| 967 |
+
wav_path = audio_path.replace('.webm', '.wav')
|
| 968 |
+
|
| 969 |
+
subprocess.run([
|
| 970 |
+
"ffmpeg", "-y", "-i", audio_path,
|
| 971 |
+
"-ac", "1", "-ar", "16000",
|
| 972 |
+
"-acodec", "pcm_s16le",
|
| 973 |
+
wav_path
|
| 974 |
+
], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 975 |
+
|
| 976 |
+
speech, sample_rate = sf.read(wav_path)
|
| 977 |
+
|
| 978 |
+
if len(speech) == 0:
|
| 979 |
+
return None, "empty_audio"
|
| 980 |
+
|
| 981 |
+
rms = np.sqrt(np.mean(speech**2))
|
| 982 |
+
peak = np.max(np.abs(speech))
|
| 983 |
+
|
| 984 |
+
if rms < 0.001 or peak < 0.02:
|
| 985 |
+
return None, f"silent_rms_{rms:.6f}_peak_{peak:.4f}"
|
| 986 |
+
|
| 987 |
+
if peak < 0.5:
|
| 988 |
+
boost_factor = 0.5 / peak if peak > 0 else 1.0
|
| 989 |
+
speech = speech * min(boost_factor, 3.0)
|
| 990 |
+
|
| 991 |
+
inputs = processor(speech, sampling_rate=sample_rate,
|
| 992 |
+
return_tensors="pt", padding=True)
|
| 993 |
+
|
| 994 |
+
with torch.no_grad():
|
| 995 |
+
logits = model(inputs.input_values.to(DEVICE)).logits
|
| 996 |
+
|
| 997 |
+
pred_ids = torch.argmax(logits, dim=-1)
|
| 998 |
+
raw_transcription = processor.batch_decode(pred_ids)[0]
|
| 999 |
+
|
| 1000 |
+
phonemes = [p for p in raw_transcription.replace(" ", "") if p.strip()]
|
| 1001 |
+
|
| 1002 |
+
print(f"Extracted phonemes: {phonemes}")
|
| 1003 |
+
return phonemes, None
|
| 1004 |
+
|
| 1005 |
+
except Exception as e:
|
| 1006 |
+
print(f"Audio processing error: {str(e)}")
|
| 1007 |
+
return None, f"error: {str(e)}"
|
| 1008 |
+
|
| 1009 |
+
# ==================================================
|
| 1010 |
+
# 8. TEST VIDEO GENERATION
|
| 1011 |
+
# ==================================================
|
| 1012 |
+
|
| 1013 |
+
def test_video_generation():
|
| 1014 |
+
"""Test that video generation merges multiple clips."""
|
| 1015 |
+
print("\n=== TESTING VIDEO GENERATION ===")
|
| 1016 |
+
|
| 1017 |
+
test_cases = [
|
| 1018 |
+
{"category": "syllable", "feedback": "Syllable issue", "target_phoneme": None},
|
| 1019 |
+
{"category": "vowel", "feedback": "Vowel issue for 'Γ¦'", "target_phoneme": "Γ¦"},
|
| 1020 |
+
{"category": "consonant", "feedback": "Consonant issue for 'r'", "target_phoneme": "r"},
|
| 1021 |
+
{"category": "ending", "feedback": "Missing final 't'", "target_phoneme": "t"},
|
| 1022 |
+
]
|
| 1023 |
+
|
| 1024 |
+
for test in test_cases:
|
| 1025 |
+
print(f"\nTesting category: {test['category']}")
|
| 1026 |
+
video_blob = build_feedback_video(
|
| 1027 |
+
test['category'],
|
| 1028 |
+
test['feedback'],
|
| 1029 |
+
test['target_phoneme']
|
| 1030 |
+
)
|
| 1031 |
+
|
| 1032 |
+
if video_blob:
|
| 1033 |
+
print(f"β Video generated successfully ({len(video_blob)} bytes)")
|
| 1034 |
+
print(f" Contains multiple merged clips")
|
| 1035 |
+
else:
|
| 1036 |
+
print(f"β Failed to generate video")
|
| 1037 |
+
|
| 1038 |
+
# Also test with just the feedback message
|
| 1039 |
+
video_blob2 = build_feedback_video(
|
| 1040 |
+
test['category'],
|
| 1041 |
+
test['feedback']
|
| 1042 |
+
)
|
| 1043 |
+
|
| 1044 |
+
if video_blob2:
|
| 1045 |
+
print(f"β Video also works without explicit target phoneme")
|
| 1046 |
+
|
| 1047 |
+
print("\n" + "="*60)
|
| 1048 |
+
|
| 1049 |
+
# ==================================================
|
| 1050 |
+
# 9. MAIN ENDPOINT
|
| 1051 |
+
# ==================================================
|
| 1052 |
+
|
| 1053 |
+
@pronragupgrade_bp.route("/score", methods=["POST"])
|
| 1054 |
+
def train_pronunciation():
|
| 1055 |
+
"""Main endpoint with multi-clip video feedback."""
|
| 1056 |
+
try:
|
| 1057 |
+
word = request.form.get('word', '').strip().lower()
|
| 1058 |
+
if not word:
|
| 1059 |
+
return jsonify({
|
| 1060 |
+
"success": False,
|
| 1061 |
+
"error": "No word provided",
|
| 1062 |
+
"scenario": "input_error"
|
| 1063 |
+
}), 400
|
| 1064 |
+
|
| 1065 |
+
if 'audio' not in request.files:
|
| 1066 |
+
return jsonify({
|
| 1067 |
+
"success": False,
|
| 1068 |
+
"error": "No audio file",
|
| 1069 |
+
"scenario": "input_error"
|
| 1070 |
+
}), 400
|
| 1071 |
+
|
| 1072 |
+
audio_file = request.files['audio']
|
| 1073 |
+
|
| 1074 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as tmp_file:
|
| 1075 |
+
audio_file.save(tmp_file.name)
|
| 1076 |
+
temp_path = tmp_file.name
|
| 1077 |
+
|
| 1078 |
+
print(f"\n=== Processing: '{word}' ===")
|
| 1079 |
+
|
| 1080 |
+
try:
|
| 1081 |
+
# Process audio
|
| 1082 |
+
student_phonemes, audio_error = process_audio_file(temp_path)
|
| 1083 |
+
|
| 1084 |
+
# Get reference
|
| 1085 |
+
reference_phonemes = get_uk_pronunciation(word)
|
| 1086 |
+
|
| 1087 |
+
# Analyze
|
| 1088 |
+
analysis = analyze_pronunciation_strict(student_phonemes, reference_phonemes, word)
|
| 1089 |
+
score = analysis["score"]
|
| 1090 |
+
|
| 1091 |
+
# Detect scenario
|
| 1092 |
+
scenario_info = ScenarioDetector.detect_scenarios(
|
| 1093 |
+
student_phonemes=student_phonemes,
|
| 1094 |
+
reference_phonemes=reference_phonemes,
|
| 1095 |
+
word=word,
|
| 1096 |
+
analysis_result=analysis,
|
| 1097 |
+
audio_error=audio_error
|
| 1098 |
+
)
|
| 1099 |
+
|
| 1100 |
+
scenario = scenario_info['scenario']
|
| 1101 |
+
category = scenario_info.get('category', scenario)
|
| 1102 |
+
feedback = scenario_info['feedback']
|
| 1103 |
+
action = scenario_info.get('action', '')
|
| 1104 |
+
target_phoneme = scenario_info.get('target_phoneme')
|
| 1105 |
+
|
| 1106 |
+
# Generate video with MULTIPLE clips
|
| 1107 |
+
print(f"Generating video for category: {category}")
|
| 1108 |
+
video_blob = build_feedback_video(category, feedback, target_phoneme)
|
| 1109 |
+
|
| 1110 |
+
# Prepare response
|
| 1111 |
+
response = {
|
| 1112 |
+
"success": True,
|
| 1113 |
+
"scenario": scenario,
|
| 1114 |
+
"score": score,
|
| 1115 |
+
"is_acceptable": score >= 75,
|
| 1116 |
+
"word": word,
|
| 1117 |
+
"student_phonemes": student_phonemes if student_phonemes else [],
|
| 1118 |
+
"reference_phonemes": reference_phonemes,
|
| 1119 |
+
"ipa_notation": "/" + "".join(reference_phonemes) + "/",
|
| 1120 |
+
"feedback": feedback,
|
| 1121 |
+
"action_suggestion": action,
|
| 1122 |
+
"videoBlobBase64": video_blob if video_blob else "",
|
| 1123 |
+
"video_clips_merged": True if video_blob else False,
|
| 1124 |
+
"analysis": {
|
| 1125 |
+
"accuracy": f"{analysis.get('exact_correct', 0)}/{analysis.get('total_expected', 0)} exact matches",
|
| 1126 |
+
"accuracy_percentage": analysis.get('accuracy_percentage', 0),
|
| 1127 |
+
}
|
| 1128 |
+
}
|
| 1129 |
+
|
| 1130 |
+
return jsonify(response)
|
| 1131 |
+
|
| 1132 |
+
finally:
|
| 1133 |
+
if os.path.exists(temp_path):
|
| 1134 |
+
os.remove(temp_path)
|
| 1135 |
+
wav_path = temp_path.replace('.webm', '.wav')
|
| 1136 |
+
if os.path.exists(wav_path):
|
| 1137 |
+
os.remove(wav_path)
|
| 1138 |
+
|
| 1139 |
+
except Exception as e:
|
| 1140 |
+
print(f"Error: {str(e)}")
|
| 1141 |
+
return jsonify({
|
| 1142 |
+
"success": False,
|
| 1143 |
+
"error": str(e),
|
| 1144 |
+
"scenario": "system_error"
|
| 1145 |
+
}), 500
|
| 1146 |
+
|
requirements.txt
CHANGED
|
@@ -53,3 +53,11 @@ openai-whisper
|
|
| 53 |
rapidfuzz==3.6.1
|
| 54 |
faster-whisper
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
rapidfuzz==3.6.1
|
| 54 |
faster-whisper
|
| 55 |
|
| 56 |
+
numpy
|
| 57 |
+
soundfile
|
| 58 |
+
eng-to-ipa
|
| 59 |
+
torch
|
| 60 |
+
torchaudio
|
| 61 |
+
torchcodec
|
| 62 |
+
|
| 63 |
+
|
teacher_feedback_sentences_category.json
CHANGED
|
@@ -1,338 +1,678 @@
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
-
"id": "feedback.
|
| 4 |
"video_file": "feedback.mp4",
|
| 5 |
-
"start":
|
| 6 |
-
"end":
|
| 7 |
-
"text": "
|
| 8 |
-
"category": "
|
| 9 |
},
|
| 10 |
{
|
| 11 |
-
"id": "feedback.
|
| 12 |
"video_file": "feedback.mp4",
|
| 13 |
-
"start":
|
| 14 |
-
"end":
|
| 15 |
-
"text": "
|
| 16 |
-
"category": "
|
| 17 |
},
|
| 18 |
{
|
| 19 |
-
"id": "feedback.
|
| 20 |
"video_file": "feedback.mp4",
|
| 21 |
-
"start":
|
| 22 |
-
"end":
|
| 23 |
-
"text": "
|
| 24 |
-
"category": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
},
|
| 26 |
{
|
| 27 |
-
"id": "feedback.
|
| 28 |
"video_file": "feedback.mp4",
|
| 29 |
-
"start":
|
| 30 |
-
"end":
|
| 31 |
-
"text": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"category": "wrong_word"
|
| 33 |
},
|
| 34 |
{
|
| 35 |
-
"id": "feedback.
|
| 36 |
"video_file": "feedback.mp4",
|
| 37 |
-
"start":
|
| 38 |
-
"end":
|
| 39 |
-
"text": "
|
| 40 |
"category": "wrong_word"
|
| 41 |
},
|
| 42 |
{
|
| 43 |
-
"id": "feedback.
|
| 44 |
"video_file": "feedback.mp4",
|
| 45 |
-
"start":
|
| 46 |
-
"end":
|
| 47 |
-
"text": "
|
| 48 |
"category": "wrong_word"
|
| 49 |
},
|
| 50 |
{
|
| 51 |
-
"id": "feedback.
|
| 52 |
"video_file": "feedback.mp4",
|
| 53 |
-
"start":
|
| 54 |
-
"end":
|
| 55 |
-
"text": "
|
| 56 |
-
"category": "
|
| 57 |
},
|
| 58 |
{
|
| 59 |
-
"id": "feedback.
|
| 60 |
"video_file": "feedback.mp4",
|
| 61 |
-
"start":
|
| 62 |
-
"end":
|
| 63 |
-
"text": "
|
| 64 |
-
"category": "
|
| 65 |
},
|
| 66 |
{
|
| 67 |
-
"id": "feedback.
|
| 68 |
"video_file": "feedback.mp4",
|
| 69 |
-
"start":
|
| 70 |
-
"end":
|
| 71 |
-
"text": "
|
| 72 |
-
"category": "
|
| 73 |
},
|
|
|
|
| 74 |
{
|
| 75 |
-
"id": "feedback.
|
| 76 |
"video_file": "feedback.mp4",
|
| 77 |
-
"start":
|
| 78 |
-
"end":
|
| 79 |
-
"text": "
|
| 80 |
-
"category": "
|
| 81 |
},
|
| 82 |
{
|
| 83 |
-
"id": "feedback.
|
| 84 |
"video_file": "feedback.mp4",
|
| 85 |
-
"start":
|
| 86 |
-
"end":
|
| 87 |
-
"text": "
|
| 88 |
-
"category": "
|
| 89 |
},
|
| 90 |
{
|
| 91 |
-
"id": "feedback.
|
| 92 |
"video_file": "feedback.mp4",
|
| 93 |
-
"start":
|
| 94 |
-
"end":
|
| 95 |
-
"text": "
|
| 96 |
-
"category": "
|
| 97 |
},
|
| 98 |
{
|
| 99 |
-
"id": "feedback.
|
| 100 |
"video_file": "feedback.mp4",
|
| 101 |
-
"start":
|
| 102 |
-
"end":
|
| 103 |
-
"text": "
|
| 104 |
-
"category": "
|
| 105 |
},
|
|
|
|
| 106 |
{
|
| 107 |
-
"id": "feedback.
|
| 108 |
"video_file": "feedback.mp4",
|
| 109 |
-
"start":
|
| 110 |
-
"end":
|
| 111 |
-
"text": "
|
| 112 |
-
"category": "
|
| 113 |
},
|
| 114 |
{
|
| 115 |
-
"id": "feedback.
|
| 116 |
"video_file": "feedback.mp4",
|
| 117 |
-
"start":
|
| 118 |
-
"end":
|
| 119 |
-
"text": "
|
| 120 |
-
"category": "
|
| 121 |
},
|
| 122 |
{
|
| 123 |
-
"id": "feedback.
|
| 124 |
"video_file": "feedback.mp4",
|
| 125 |
-
"start":
|
| 126 |
-
"end":
|
| 127 |
-
"text": "
|
| 128 |
-
"category": "
|
| 129 |
},
|
| 130 |
{
|
| 131 |
-
"id": "feedback.
|
| 132 |
"video_file": "feedback.mp4",
|
| 133 |
-
"start":
|
| 134 |
-
"end":
|
| 135 |
-
"text": "
|
| 136 |
-
"category": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
},
|
|
|
|
| 138 |
{
|
| 139 |
-
"id": "feedback.
|
| 140 |
"video_file": "feedback.mp4",
|
| 141 |
-
"start":
|
| 142 |
-
"end":
|
| 143 |
-
"text": "
|
| 144 |
"category": "ending"
|
| 145 |
},
|
| 146 |
{
|
| 147 |
-
"id": "feedback.
|
| 148 |
"video_file": "feedback.mp4",
|
| 149 |
-
"start":
|
| 150 |
-
"end":
|
| 151 |
-
"text": "
|
| 152 |
"category": "ending"
|
| 153 |
},
|
| 154 |
{
|
| 155 |
-
"id": "feedback.
|
| 156 |
"video_file": "feedback.mp4",
|
| 157 |
-
"start":
|
| 158 |
-
"end":
|
| 159 |
-
"text": "
|
| 160 |
"category": "ending"
|
| 161 |
},
|
| 162 |
{
|
| 163 |
-
"id": "feedback.
|
| 164 |
"video_file": "feedback.mp4",
|
| 165 |
-
"start":
|
| 166 |
-
"end":
|
| 167 |
-
"text": "
|
| 168 |
-
"category": "
|
| 169 |
},
|
| 170 |
{
|
| 171 |
-
"id": "feedback.
|
| 172 |
"video_file": "feedback.mp4",
|
| 173 |
-
"start":
|
| 174 |
-
"end":
|
| 175 |
-
"text": "
|
| 176 |
-
"category": "
|
| 177 |
},
|
| 178 |
{
|
| 179 |
-
"id": "feedback.
|
| 180 |
"video_file": "feedback.mp4",
|
| 181 |
-
"start":
|
| 182 |
-
"end":
|
| 183 |
-
"text": "
|
| 184 |
-
"category": "
|
| 185 |
},
|
| 186 |
{
|
| 187 |
-
"id": "feedback.
|
| 188 |
"video_file": "feedback.mp4",
|
| 189 |
-
"start":
|
| 190 |
-
"end":
|
| 191 |
-
"text": "
|
| 192 |
-
"category": "
|
| 193 |
},
|
| 194 |
{
|
| 195 |
-
"id": "feedback.
|
| 196 |
"video_file": "feedback.mp4",
|
| 197 |
-
"start":
|
| 198 |
-
"end":
|
| 199 |
-
"text": "
|
| 200 |
-
"category": "
|
| 201 |
},
|
|
|
|
| 202 |
{
|
| 203 |
-
"id": "feedback.
|
| 204 |
"video_file": "feedback.mp4",
|
| 205 |
-
"start":
|
| 206 |
-
"end":
|
| 207 |
-
"text": "
|
| 208 |
-
"category": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
},
|
| 210 |
{
|
| 211 |
-
"id": "feedback.
|
| 212 |
"video_file": "feedback.mp4",
|
| 213 |
-
"start":
|
| 214 |
-
"end":
|
| 215 |
-
"text": "
|
| 216 |
-
"category": "
|
|
|
|
| 217 |
},
|
| 218 |
{
|
| 219 |
-
"id": "feedback.
|
| 220 |
"video_file": "feedback.mp4",
|
| 221 |
-
"start":
|
| 222 |
-
"end":
|
| 223 |
-
"text": "
|
| 224 |
-
"category": "
|
|
|
|
| 225 |
},
|
| 226 |
{
|
| 227 |
-
"id": "feedback.
|
| 228 |
"video_file": "feedback.mp4",
|
| 229 |
-
"start":
|
| 230 |
-
"end":
|
| 231 |
-
"text": "
|
| 232 |
-
"category": "
|
|
|
|
| 233 |
},
|
| 234 |
{
|
| 235 |
-
"id": "feedback.
|
| 236 |
"video_file": "feedback.mp4",
|
| 237 |
-
"start":
|
| 238 |
-
"end":
|
| 239 |
-
"text": "
|
| 240 |
-
"category": "
|
|
|
|
| 241 |
},
|
| 242 |
{
|
| 243 |
-
"id": "feedback.
|
| 244 |
"video_file": "feedback.mp4",
|
| 245 |
-
"start":
|
| 246 |
-
"end":
|
| 247 |
-
"text": "
|
| 248 |
-
"category": "
|
|
|
|
| 249 |
},
|
| 250 |
{
|
| 251 |
-
"id": "feedback.
|
| 252 |
"video_file": "feedback.mp4",
|
| 253 |
-
"start":
|
| 254 |
-
"end":
|
| 255 |
-
"text": "
|
| 256 |
-
"category": "
|
|
|
|
| 257 |
},
|
| 258 |
{
|
| 259 |
-
"id": "feedback.
|
| 260 |
"video_file": "feedback.mp4",
|
| 261 |
-
"start":
|
| 262 |
-
"end":
|
| 263 |
-
"text": "
|
| 264 |
-
"category": "
|
|
|
|
| 265 |
},
|
| 266 |
{
|
| 267 |
-
"id": "feedback.
|
| 268 |
"video_file": "feedback.mp4",
|
| 269 |
-
"start":
|
| 270 |
-
"end":
|
| 271 |
-
"text": "sound
|
| 272 |
-
"category": "
|
|
|
|
| 273 |
},
|
|
|
|
| 274 |
{
|
| 275 |
-
"id": "feedback.
|
| 276 |
"video_file": "feedback.mp4",
|
| 277 |
-
"start":
|
| 278 |
-
"end":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
"text": "I could not hear your voice.",
|
| 280 |
"category": "silence"
|
| 281 |
},
|
| 282 |
{
|
| 283 |
-
"id": "feedback.
|
| 284 |
"video_file": "feedback.mp4",
|
| 285 |
-
"start":
|
| 286 |
-
"end":
|
| 287 |
"text": "Please hold the record button and say the word.",
|
| 288 |
"category": "silence"
|
| 289 |
},
|
| 290 |
{
|
| 291 |
-
"id": "feedback.
|
| 292 |
"video_file": "feedback.mp4",
|
| 293 |
-
"start":
|
| 294 |
-
"end":
|
| 295 |
"text": "It was very quiet.",
|
| 296 |
"category": "silence"
|
| 297 |
},
|
| 298 |
{
|
| 299 |
-
"id": "feedback.
|
| 300 |
"video_file": "feedback.mp4",
|
| 301 |
-
"start":
|
| 302 |
-
"end":
|
| 303 |
"text": "Speak a little louder and try again.",
|
| 304 |
"category": "silence"
|
| 305 |
},
|
| 306 |
{
|
| 307 |
-
"id": "feedback.
|
| 308 |
"video_file": "feedback.mp4",
|
| 309 |
-
"start":
|
| 310 |
-
"end":
|
| 311 |
"text": "No sound was recorded.",
|
| 312 |
"category": "silence"
|
| 313 |
},
|
| 314 |
{
|
| 315 |
-
"id": "feedback.
|
| 316 |
"video_file": "feedback.mp4",
|
| 317 |
-
"start":
|
| 318 |
-
"end":
|
| 319 |
"text": "Check your microphone and say the word again.",
|
| 320 |
"category": "silence"
|
| 321 |
},
|
| 322 |
{
|
| 323 |
-
"id": "feedback.
|
| 324 |
"video_file": "feedback.mp4",
|
| 325 |
-
"start":
|
| 326 |
-
"end":
|
| 327 |
"text": "I think you whispered.",
|
| 328 |
"category": "silence"
|
| 329 |
},
|
| 330 |
{
|
| 331 |
-
"id": "feedback.
|
| 332 |
"video_file": "feedback.mp4",
|
| 333 |
-
"start":
|
| 334 |
-
"end":
|
| 335 |
"text": "Use your clear classroom voice.",
|
| 336 |
"category": "silence"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
}
|
| 338 |
-
|
|
|
|
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
+
"id": "feedback.mp4_sent_000",
|
| 4 |
"video_file": "feedback.mp4",
|
| 5 |
+
"start": "000.000",
|
| 6 |
+
"end": "002.171",
|
| 7 |
+
"text": "Good job, keep it up.",
|
| 8 |
+
"category": "success"
|
| 9 |
},
|
| 10 |
{
|
| 11 |
+
"id": "feedback.mp4_sent_001",
|
| 12 |
"video_file": "feedback.mp4",
|
| 13 |
+
"start": "002.897",
|
| 14 |
+
"end": "006.374",
|
| 15 |
+
"text": "Perfect pronunciation.",
|
| 16 |
+
"category": "success"
|
| 17 |
},
|
| 18 |
{
|
| 19 |
+
"id": "feedback.mp4_sent_002",
|
| 20 |
"video_file": "feedback.mp4",
|
| 21 |
+
"start": "006.953",
|
| 22 |
+
"end": "011.010",
|
| 23 |
+
"text": "Great, your pronunciation is clear.",
|
| 24 |
+
"category": "success"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"id": "feedback.mp4_sent_003",
|
| 28 |
+
"video_file": "feedback.mp4",
|
| 29 |
+
"start": "011.589",
|
| 30 |
+
"end": "014.200",
|
| 31 |
+
"text": "Move to the next word.",
|
| 32 |
+
"category": "success"
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"id": "feedback.mp4_sent_004",
|
| 36 |
+
"video_file": "feedback.mp4",
|
| 37 |
+
"start": "014.766",
|
| 38 |
+
"end": "017.060",
|
| 39 |
+
"text": "Let's move on.",
|
| 40 |
+
"category": "success"
|
| 41 |
},
|
| 42 |
{
|
| 43 |
+
"id": "feedback.mp4_sent_005",
|
| 44 |
"video_file": "feedback.mp4",
|
| 45 |
+
"start": "017.098",
|
| 46 |
+
"end": "019.700",
|
| 47 |
+
"text": "Ready for the next word?",
|
| 48 |
+
"category": "success"
|
| 49 |
+
},
|
| 50 |
+
|
| 51 |
+
{
|
| 52 |
+
"id": "feedback.mp4_sent_006",
|
| 53 |
+
"video_file": "feedback.mp4",
|
| 54 |
+
"start": "019.992",
|
| 55 |
+
"end": "023.175",
|
| 56 |
+
"text": "You said a different word.",
|
| 57 |
"category": "wrong_word"
|
| 58 |
},
|
| 59 |
{
|
| 60 |
+
"id": "feedback.mp4_sent_007",
|
| 61 |
"video_file": "feedback.mp4",
|
| 62 |
+
"start": "023.178",
|
| 63 |
+
"end": "026.365",
|
| 64 |
+
"text": "Oops! Thatβs not the word.",
|
| 65 |
"category": "wrong_word"
|
| 66 |
},
|
| 67 |
{
|
| 68 |
+
"id": "feedback.mp4_sent_008",
|
| 69 |
"video_file": "feedback.mp4",
|
| 70 |
+
"start": "026.660",
|
| 71 |
+
"end": "029.240",
|
| 72 |
+
"text": "I heard a different word.",
|
| 73 |
"category": "wrong_word"
|
| 74 |
},
|
| 75 |
{
|
| 76 |
+
"id": "feedback.mp4_sent_009",
|
| 77 |
"video_file": "feedback.mp4",
|
| 78 |
+
"start": "030.134",
|
| 79 |
+
"end": "033.135",
|
| 80 |
+
"text": "Letβs focus on the correct one.",
|
| 81 |
+
"category": "wrong_word"
|
| 82 |
},
|
| 83 |
{
|
| 84 |
+
"id": "feedback.mp4_sent_010",
|
| 85 |
"video_file": "feedback.mp4",
|
| 86 |
+
"start": "033.615",
|
| 87 |
+
"end": "037.085",
|
| 88 |
+
"text": "Letβs practice the word once more.",
|
| 89 |
+
"category": "wrong_word"
|
| 90 |
},
|
| 91 |
{
|
| 92 |
+
"id": "feedback.mp4_sent_011",
|
| 93 |
"video_file": "feedback.mp4",
|
| 94 |
+
"start": "037.380",
|
| 95 |
+
"end": "040.270",
|
| 96 |
+
"text": "Can you say the correct word again?",
|
| 97 |
+
"category": "wrong_word"
|
| 98 |
},
|
| 99 |
+
|
| 100 |
{
|
| 101 |
+
"id": "feedback.mp4_sent_012",
|
| 102 |
"video_file": "feedback.mp4",
|
| 103 |
+
"start": "041.412",
|
| 104 |
+
"end": "044.325",
|
| 105 |
+
"text": "I heard more than one word.",
|
| 106 |
+
"category": "multiple_words"
|
| 107 |
},
|
| 108 |
{
|
| 109 |
+
"id": "feedback.mp4_sent_013",
|
| 110 |
"video_file": "feedback.mp4",
|
| 111 |
+
"start": "044.330",
|
| 112 |
+
"end": "047.220",
|
| 113 |
+
"text": "You said multiple words.",
|
| 114 |
+
"category": "multiple_words"
|
| 115 |
},
|
| 116 |
{
|
| 117 |
+
"id": "feedback.mp4_sent_014",
|
| 118 |
"video_file": "feedback.mp4",
|
| 119 |
+
"start": "047.230",
|
| 120 |
+
"end": "050.700",
|
| 121 |
+
"text": "Letβs focus on just one.",
|
| 122 |
+
"category": "multiple_words"
|
| 123 |
},
|
| 124 |
{
|
| 125 |
+
"id": "feedback.mp4_sent_015",
|
| 126 |
"video_file": "feedback.mp4",
|
| 127 |
+
"start": "050.993",
|
| 128 |
+
"end": "054.175",
|
| 129 |
+
"text": "Please say only one word.",
|
| 130 |
+
"category": "multiple_words"
|
| 131 |
},
|
| 132 |
+
|
| 133 |
{
|
| 134 |
+
"id": "feedback.mp4_sent_016",
|
| 135 |
"video_file": "feedback.mp4",
|
| 136 |
+
"start": "055.091",
|
| 137 |
+
"end": "059.041",
|
| 138 |
+
"text": "Oops! I did not hear all the parts of the word.",
|
| 139 |
+
"category": "syllable"
|
| 140 |
},
|
| 141 |
{
|
| 142 |
+
"id": "feedback.mp4_sent_017",
|
| 143 |
"video_file": "feedback.mp4",
|
| 144 |
+
"start": "059.665",
|
| 145 |
+
"end": "062.783",
|
| 146 |
+
"text": "It sounded like the word was missing a part.",
|
| 147 |
+
"category": "syllable"
|
| 148 |
},
|
| 149 |
{
|
| 150 |
+
"id": "feedback.mp4_sent_018",
|
| 151 |
"video_file": "feedback.mp4",
|
| 152 |
+
"start": "063.615",
|
| 153 |
+
"end": "067.980",
|
| 154 |
+
"text": "No problemβtry again, slowly and clearly.",
|
| 155 |
+
"category": "syllable"
|
| 156 |
},
|
| 157 |
{
|
| 158 |
+
"id": "feedback.mp4_sent_019",
|
| 159 |
"video_file": "feedback.mp4",
|
| 160 |
+
"start": "068.812",
|
| 161 |
+
"end": "073.178",
|
| 162 |
+
"text": "Nice tryβletβs say every part this time.",
|
| 163 |
+
"category": "syllable"
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"id": "feedback.mp4_sent_020",
|
| 167 |
+
"video_file": "feedback.mp4",
|
| 168 |
+
"start": "074.010",
|
| 169 |
+
"end": "077.750",
|
| 170 |
+
"text": "Tap once for each part and say it again.",
|
| 171 |
+
"category": "syllable"
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"id": "feedback.mp4_sent_021",
|
| 175 |
+
"video_file": "feedback.mp4",
|
| 176 |
+
"start": "078.583",
|
| 177 |
+
"end": "083.572",
|
| 178 |
+
"text": "Listen, then say it in parts, not too fast.",
|
| 179 |
+
"category": "syllable"
|
| 180 |
},
|
| 181 |
+
|
| 182 |
{
|
| 183 |
+
"id": "feedback.mp4_sent_022",
|
| 184 |
"video_file": "feedback.mp4",
|
| 185 |
+
"start": "084.815",
|
| 186 |
+
"end": "088.355",
|
| 187 |
+
"text": "Oops! The ending sound was too soft.",
|
| 188 |
"category": "ending"
|
| 189 |
},
|
| 190 |
{
|
| 191 |
+
"id": "feedback.mp4_sent_023",
|
| 192 |
"video_file": "feedback.mp4",
|
| 193 |
+
"start": "088.769",
|
| 194 |
+
"end": "092.719",
|
| 195 |
+
"text": "It sounds like the last sound got skipped.",
|
| 196 |
"category": "ending"
|
| 197 |
},
|
| 198 |
{
|
| 199 |
+
"id": "feedback.mp4_sent_024",
|
| 200 |
"video_file": "feedback.mp4",
|
| 201 |
+
"start": "093.135",
|
| 202 |
+
"end": "096.877",
|
| 203 |
+
"text": "You missed the ending sound, try again.",
|
| 204 |
"category": "ending"
|
| 205 |
},
|
| 206 |
{
|
| 207 |
+
"id": "feedback.mp4_sent_025",
|
| 208 |
"video_file": "feedback.mp4",
|
| 209 |
+
"start": "097.915",
|
| 210 |
+
"end": "102.074",
|
| 211 |
+
"text": "The last sound wasnβt clear. Please say it again.",
|
| 212 |
+
"category": "ending"
|
| 213 |
},
|
| 214 |
{
|
| 215 |
+
"id": "feedback.mp4_sent_026",
|
| 216 |
"video_file": "feedback.mp4",
|
| 217 |
+
"start": "102.698",
|
| 218 |
+
"end": "107.687",
|
| 219 |
+
"text": "No worriesβtry again and say the ending clearly.",
|
| 220 |
+
"category": "ending"
|
| 221 |
},
|
| 222 |
{
|
| 223 |
+
"id": "feedback.mp4_sent_027",
|
| 224 |
"video_file": "feedback.mp4",
|
| 225 |
+
"start": "108.310",
|
| 226 |
+
"end": "113.716",
|
| 227 |
+
"text": "Say the word again and make the last sound loud and clear.",
|
| 228 |
+
"category": "ending"
|
| 229 |
},
|
| 230 |
{
|
| 231 |
+
"id": "feedback.mp4_sent_028",
|
| 232 |
"video_file": "feedback.mp4",
|
| 233 |
+
"start": "114.130",
|
| 234 |
+
"end": "119.121",
|
| 235 |
+
"text": "You added an extra sound at the end. Letβs correct that.",
|
| 236 |
+
"category": "ending"
|
| 237 |
},
|
| 238 |
{
|
| 239 |
+
"id": "feedback.mp4_sent_029",
|
| 240 |
"video_file": "feedback.mp4",
|
| 241 |
+
"start": "119.537",
|
| 242 |
+
"end": "124.942",
|
| 243 |
+
"text": "The ending sound was too long. Try saying it shorter.",
|
| 244 |
+
"category": "ending"
|
| 245 |
},
|
| 246 |
+
|
| 247 |
{
|
| 248 |
+
"id": "feedback.mp4_sent_030",
|
| 249 |
"video_file": "feedback.mp4",
|
| 250 |
+
"start": 124.944,
|
| 251 |
+
"end": 127.645,
|
| 252 |
+
"text": "Check your vowel sound /Γ¦/.",
|
| 253 |
+
"category": "vowel",
|
| 254 |
+
"phoneme": "Γ¦"
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"id": "feedback.mp4_sent_031",
|
| 258 |
+
"video_file": "feedback.mp4",
|
| 259 |
+
"start": 130.550,
|
| 260 |
+
"end": 133.674,
|
| 261 |
+
"text": "Check your vowel sound /e/.",
|
| 262 |
+
"category": "vowel",
|
| 263 |
+
"phoneme": "e"
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"id": "feedback.mp4_sent_032",
|
| 267 |
+
"video_file": "feedback.mp4",
|
| 268 |
+
"start": 135.960,
|
| 269 |
+
"end": 139.287,
|
| 270 |
+
"text": "Check your vowel sound /Ιͺ/.",
|
| 271 |
+
"category": "vowel",
|
| 272 |
+
"phoneme": "Ιͺ"
|
| 273 |
},
|
| 274 |
{
|
| 275 |
+
"id": "feedback.mp4_sent_033",
|
| 276 |
"video_file": "feedback.mp4",
|
| 277 |
+
"start": 141.366,
|
| 278 |
+
"end": 146.355,
|
| 279 |
+
"text": "Check your vowel sound /Ι/ or /Ι/.",
|
| 280 |
+
"category": "vowel",
|
| 281 |
+
"phoneme": "Ι"
|
| 282 |
},
|
| 283 |
{
|
| 284 |
+
"id": "feedback.mp4_sent_034",
|
| 285 |
"video_file": "feedback.mp4",
|
| 286 |
+
"start": 148.266,
|
| 287 |
+
"end": 151.344,
|
| 288 |
+
"text": "Check your vowel sound /Κ/.",
|
| 289 |
+
"category": "vowel",
|
| 290 |
+
"phoneme": "Κ"
|
| 291 |
},
|
| 292 |
{
|
| 293 |
+
"id": "feedback.mp4_sent_035",
|
| 294 |
"video_file": "feedback.mp4",
|
| 295 |
+
"start": 151.345,
|
| 296 |
+
"end": 154.255,
|
| 297 |
+
"text": "Check your vowel sound /iΛ/.",
|
| 298 |
+
"category": "vowel",
|
| 299 |
+
"phoneme": "iΛ"
|
| 300 |
},
|
| 301 |
{
|
| 302 |
+
"id": "feedback.mp4_sent_036",
|
| 303 |
"video_file": "feedback.mp4",
|
| 304 |
+
"start": 158.622,
|
| 305 |
+
"end": 161.947,
|
| 306 |
+
"text": "Check your vowel sound /uΛ/.",
|
| 307 |
+
"category": "vowel",
|
| 308 |
+
"phoneme": "uΛ"
|
| 309 |
},
|
| 310 |
{
|
| 311 |
+
"id": "feedback.mp4_sent_037",
|
| 312 |
"video_file": "feedback.mp4",
|
| 313 |
+
"start": 166.936,
|
| 314 |
+
"end": 170.262,
|
| 315 |
+
"text": "Check your vowel sound /eΙͺ/.",
|
| 316 |
+
"category": "vowel",
|
| 317 |
+
"phoneme": "eΙͺ"
|
| 318 |
},
|
| 319 |
{
|
| 320 |
+
"id": "feedback.mp4_sent_038",
|
| 321 |
"video_file": "feedback.mp4",
|
| 322 |
+
"start": 181.489,
|
| 323 |
+
"end": 184.607,
|
| 324 |
+
"text": "Check your vowel sound /ΙΛ/.",
|
| 325 |
+
"category": "vowel",
|
| 326 |
+
"phoneme": "ΙΛ"
|
| 327 |
},
|
| 328 |
{
|
| 329 |
+
"id": "feedback.mp4_sent_039",
|
| 330 |
"video_file": "feedback.mp4",
|
| 331 |
+
"start": 187.725,
|
| 332 |
+
"end": 190.844,
|
| 333 |
+
"text": "Check your vowel sound /ΙΛ/.",
|
| 334 |
+
"category": "vowel",
|
| 335 |
+
"phoneme": "ΙΛ"
|
| 336 |
},
|
| 337 |
{
|
| 338 |
+
"id": "feedback.mp4_sent_040",
|
| 339 |
"video_file": "feedback.mp4",
|
| 340 |
+
"start": 193.335,
|
| 341 |
+
"end": 196.460,
|
| 342 |
+
"text": "Check your vowel sound /aΚ/.",
|
| 343 |
+
"category": "vowel",
|
| 344 |
+
"phoneme": "aΚ"
|
| 345 |
},
|
| 346 |
+
|
| 347 |
{
|
| 348 |
+
"id": "feedback.mp4_sent_041",
|
| 349 |
"video_file": "feedback.mp4",
|
| 350 |
+
"start": "197.285",
|
| 351 |
+
"end": "202.280",
|
| 352 |
+
"text": "Check your vowel sound, make sure it's pronounced clearly.",
|
| 353 |
+
"category": "vowel"
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"id": "feedback.mp4_sent_042",
|
| 357 |
+
"video_file": "feedback.mp4",
|
| 358 |
+
"start": "204.359",
|
| 359 |
+
"end": "210.175",
|
| 360 |
+
"text": "Listen closely to the vowel sound, it needs to be clearer.",
|
| 361 |
+
"category": "vowel"
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"id": "feedback.mp4_sent_043",
|
| 365 |
+
"video_file": "feedback.mp4",
|
| 366 |
+
"start": "212.465",
|
| 367 |
+
"end": "217.670",
|
| 368 |
+
"text": "Try saying the vowel sound a little longer and more clearly.",
|
| 369 |
+
"category": "vowel"
|
| 370 |
+
},
|
| 371 |
+
|
| 372 |
+
{
|
| 373 |
+
"id": "feedback.mp4_sent_044",
|
| 374 |
+
"video_file": "feedback.mp4",
|
| 375 |
+
"start": "242.608",
|
| 376 |
+
"end": "245.519",
|
| 377 |
"text": "I could not hear your voice.",
|
| 378 |
"category": "silence"
|
| 379 |
},
|
| 380 |
{
|
| 381 |
+
"id": "feedback.mp4_sent_045",
|
| 382 |
"video_file": "feedback.mp4",
|
| 383 |
+
"start": "245.935",
|
| 384 |
+
"end": "250.092",
|
| 385 |
"text": "Please hold the record button and say the word.",
|
| 386 |
"category": "silence"
|
| 387 |
},
|
| 388 |
{
|
| 389 |
+
"id": "feedback.mp4_sent_046",
|
| 390 |
"video_file": "feedback.mp4",
|
| 391 |
+
"start": "251.756",
|
| 392 |
+
"end": "254.250",
|
| 393 |
"text": "It was very quiet.",
|
| 394 |
"category": "silence"
|
| 395 |
},
|
| 396 |
{
|
| 397 |
+
"id": "feedback.mp4_sent_047",
|
| 398 |
"video_file": "feedback.mp4",
|
| 399 |
+
"start": "254.455",
|
| 400 |
+
"end": "258.408",
|
| 401 |
"text": "Speak a little louder and try again.",
|
| 402 |
"category": "silence"
|
| 403 |
},
|
| 404 |
{
|
| 405 |
+
"id": "feedback.mp4_sent_048",
|
| 406 |
"video_file": "feedback.mp4",
|
| 407 |
+
"start": "259.235",
|
| 408 |
+
"end": "262.155",
|
| 409 |
"text": "No sound was recorded.",
|
| 410 |
"category": "silence"
|
| 411 |
},
|
| 412 |
{
|
| 413 |
+
"id": "feedback.mp4_sent_049",
|
| 414 |
"video_file": "feedback.mp4",
|
| 415 |
+
"start": "262.566",
|
| 416 |
+
"end": "266.724",
|
| 417 |
"text": "Check your microphone and say the word again.",
|
| 418 |
"category": "silence"
|
| 419 |
},
|
| 420 |
{
|
| 421 |
+
"id": "feedback.mp4_sent_050",
|
| 422 |
"video_file": "feedback.mp4",
|
| 423 |
+
"start": "267.345",
|
| 424 |
+
"end": "270.050",
|
| 425 |
"text": "I think you whispered.",
|
| 426 |
"category": "silence"
|
| 427 |
},
|
| 428 |
{
|
| 429 |
+
"id": "feedback.mp4_sent_051",
|
| 430 |
"video_file": "feedback.mp4",
|
| 431 |
+
"start": "270.260",
|
| 432 |
+
"end": "274.625",
|
| 433 |
"text": "Use your clear classroom voice.",
|
| 434 |
"category": "silence"
|
| 435 |
+
},
|
| 436 |
+
{
|
| 437 |
+
"id": "feedback.mp4_sent_052",
|
| 438 |
+
"video_file": "feedback.mp4",
|
| 439 |
+
"start": "219.533",
|
| 440 |
+
"end": "224.314",
|
| 441 |
+
"text": "Check your consonant sound, it should be sharper.",
|
| 442 |
+
"category": "consonant"
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"id": "feedback.mp4_sent_053",
|
| 446 |
+
"video_file": "feedback.mp4",
|
| 447 |
+
"start": "226.394",
|
| 448 |
+
"end": "232.838",
|
| 449 |
+
"text": "Make sure to pronounce the consonant clearly, itβs important for clarity.",
|
| 450 |
+
"category": "consonant"
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"id": "feedback.mp4_sent_054",
|
| 454 |
+
"video_file": "feedback.mp4",
|
| 455 |
+
"start": "234.196",
|
| 456 |
+
"end": "240.737",
|
| 457 |
+
"text": "Focus on the consonant sound, it needs to be more distinct.",
|
| 458 |
+
"category": "consonant"
|
| 459 |
+
},
|
| 460 |
+
|
| 461 |
+
{
|
| 462 |
+
"id": "feedback.mp4_sent_055",
|
| 463 |
+
"video_file": "feedback.mp4",
|
| 464 |
+
"start": 273.684,
|
| 465 |
+
"end": 277.460,
|
| 466 |
+
"text": "Check your consonant sound /b/.",
|
| 467 |
+
"category": "consonant",
|
| 468 |
+
"phoneme": "b"
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"id": "feedback.mp4_sent_056",
|
| 472 |
+
"video_file": "feedback.mp4",
|
| 473 |
+
"start": 277.465,
|
| 474 |
+
"end": 281.820,
|
| 475 |
+
"text": "Check your consonant sound /ch/.",
|
| 476 |
+
"category": "consonant",
|
| 477 |
+
"phoneme": "tΚ"
|
| 478 |
+
},
|
| 479 |
+
{
|
| 480 |
+
"id": "feedback.mp4_sent_057",
|
| 481 |
+
"video_file": "feedback.mp4",
|
| 482 |
+
"start": 282.404,
|
| 483 |
+
"end": 286.760,
|
| 484 |
+
"text": "Check your consonant sound /d/.",
|
| 485 |
+
"category": "consonant",
|
| 486 |
+
"phoneme": "d"
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"id": "feedback.mp4_sent_058",
|
| 490 |
+
"video_file": "feedback.mp4",
|
| 491 |
+
"start": 287.049,
|
| 492 |
+
"end": 291.407,
|
| 493 |
+
"text": "Check your consonant sound /dh/.",
|
| 494 |
+
"category": "consonant",
|
| 495 |
+
"phoneme": "Γ°"
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"id": "feedback.mp4_sent_059",
|
| 499 |
+
"video_file": "feedback.mp4",
|
| 500 |
+
"start": 291.697,
|
| 501 |
+
"end": 296.055,
|
| 502 |
+
"text": "Check your consonant sound /f/.",
|
| 503 |
+
"category": "consonant",
|
| 504 |
+
"phoneme": "f"
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"id": "feedback.mp4_sent_060",
|
| 508 |
+
"video_file": "feedback.mp4",
|
| 509 |
+
"start": 296.635,
|
| 510 |
+
"end": 300.704,
|
| 511 |
+
"text": "Check your consonant sound /g/.",
|
| 512 |
+
"category": "consonant",
|
| 513 |
+
"phoneme": "Ι‘"
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"id": "feedback.mp4_sent_061",
|
| 517 |
+
"video_file": "feedback.mp4",
|
| 518 |
+
"start": 301.285,
|
| 519 |
+
"end": 305.352,
|
| 520 |
+
"text": "Check your consonant sound /h/.",
|
| 521 |
+
"category": "consonant",
|
| 522 |
+
"phoneme": "h"
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"id": "feedback.mp4_sent_062",
|
| 526 |
+
"video_file": "feedback.mp4",
|
| 527 |
+
"start": 305.643,
|
| 528 |
+
"end": 310.001,
|
| 529 |
+
"text": "Check your consonant sound /j/.",
|
| 530 |
+
"category": "consonant",
|
| 531 |
+
"phoneme": "j"
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"id": "feedback.mp4_sent_063",
|
| 535 |
+
"video_file": "feedback.mp4",
|
| 536 |
+
"start": 310.290,
|
| 537 |
+
"end": 314.649,
|
| 538 |
+
"text": "Check your consonant sound /k/.",
|
| 539 |
+
"category": "consonant",
|
| 540 |
+
"phoneme": "k"
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"id": "feedback.mp4_sent_064",
|
| 544 |
+
"video_file": "feedback.mp4",
|
| 545 |
+
"start": 314.935,
|
| 546 |
+
"end": 319.295,
|
| 547 |
+
"text": "Check your consonant sound /l/.",
|
| 548 |
+
"category": "consonant",
|
| 549 |
+
"phoneme": "l"
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"id": "feedback.mp4_sent_065",
|
| 553 |
+
"video_file": "feedback.mp4",
|
| 554 |
+
"start": 319.300,
|
| 555 |
+
"end": 323.945,
|
| 556 |
+
"text": "Check your consonant sound /m/.",
|
| 557 |
+
"category": "consonant",
|
| 558 |
+
"phoneme": "m"
|
| 559 |
+
},
|
| 560 |
+
{
|
| 561 |
+
"id": "feedback.mp4_sent_066",
|
| 562 |
+
"video_file": "feedback.mp4",
|
| 563 |
+
"start": 323.948,
|
| 564 |
+
"end": 328.595,
|
| 565 |
+
"text": "Check your consonant sound /n/.",
|
| 566 |
+
"category": "consonant",
|
| 567 |
+
"phoneme": "n"
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"id": "feedback.mp4_sent_067",
|
| 571 |
+
"video_file": "feedback.mp4",
|
| 572 |
+
"start": 328.597,
|
| 573 |
+
"end": 333.245,
|
| 574 |
+
"text": "Check your consonant sound /ng/.",
|
| 575 |
+
"category": "consonant",
|
| 576 |
+
"phoneme": "Ε"
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"id": "feedback.mp4_sent_068",
|
| 580 |
+
"video_file": "feedback.mp4",
|
| 581 |
+
"start": 333.820,
|
| 582 |
+
"end": 337.604,
|
| 583 |
+
"text": "Check your consonant sound /p/.",
|
| 584 |
+
"category": "consonant",
|
| 585 |
+
"phoneme": "p"
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"id": "feedback.mp4_sent_069",
|
| 589 |
+
"video_file": "feedback.mp4",
|
| 590 |
+
"start": 338.180,
|
| 591 |
+
"end": 342.539,
|
| 592 |
+
"text": "Check your consonant sound /r/.",
|
| 593 |
+
"category": "consonant",
|
| 594 |
+
"phoneme": "r"
|
| 595 |
+
},
|
| 596 |
+
{
|
| 597 |
+
"id": "feedback.mp4_sent_070",
|
| 598 |
+
"video_file": "feedback.mp4",
|
| 599 |
+
"start": 342.541,
|
| 600 |
+
"end": 346.899,
|
| 601 |
+
"text": "Check your consonant sound /s/.",
|
| 602 |
+
"category": "consonant",
|
| 603 |
+
"phoneme": "s"
|
| 604 |
+
},
|
| 605 |
+
{
|
| 606 |
+
"id": "feedback.mp4_sent_071",
|
| 607 |
+
"video_file": "feedback.mp4",
|
| 608 |
+
"start": 346.902,
|
| 609 |
+
"end": 351.547,
|
| 610 |
+
"text": "Check your consonant sound /sh/.",
|
| 611 |
+
"category": "consonant",
|
| 612 |
+
"phoneme": "Κ"
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"id": "feedback.mp4_sent_072",
|
| 616 |
+
"video_file": "feedback.mp4",
|
| 617 |
+
"start": 351.830,
|
| 618 |
+
"end": 355.904,
|
| 619 |
+
"text": "Check your consonant sound /t/.",
|
| 620 |
+
"category": "consonant",
|
| 621 |
+
"phoneme": "t"
|
| 622 |
+
},
|
| 623 |
+
{
|
| 624 |
+
"id": "feedback.mp4_sent_073",
|
| 625 |
+
"video_file": "feedback.mp4",
|
| 626 |
+
"start": 356.196,
|
| 627 |
+
"end": 360.263,
|
| 628 |
+
"text": "Check your consonant sound /th/.",
|
| 629 |
+
"category": "consonant",
|
| 630 |
+
"phoneme": "ΞΈ"
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"id": "feedback.mp4_sent_074",
|
| 634 |
+
"video_file": "feedback.mp4",
|
| 635 |
+
"start": 360.554,
|
| 636 |
+
"end": 364.912,
|
| 637 |
+
"text": "Check your consonant sound /v/.",
|
| 638 |
+
"category": "consonant",
|
| 639 |
+
"phoneme": "v"
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"id": "feedback.mp4_sent_075",
|
| 643 |
+
"video_file": "feedback.mp4",
|
| 644 |
+
"start": 365.490,
|
| 645 |
+
"end": 369.560,
|
| 646 |
+
"text": "Check your consonant sound /w/.",
|
| 647 |
+
"category": "consonant",
|
| 648 |
+
"phoneme": "w"
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"id": "feedback.mp4_sent_076",
|
| 652 |
+
"video_file": "feedback.mp4",
|
| 653 |
+
"start": 369.562,
|
| 654 |
+
"end": 373.919,
|
| 655 |
+
"text": "Check your consonant sound /y/.",
|
| 656 |
+
"category": "consonant",
|
| 657 |
+
"phoneme": "j"
|
| 658 |
+
},
|
| 659 |
+
{
|
| 660 |
+
"id": "feedback.mp4_sent_077",
|
| 661 |
+
"video_file": "feedback.mp4",
|
| 662 |
+
"start": 374.205,
|
| 663 |
+
"end": 378.570,
|
| 664 |
+
"text": "Check your consonant sound /z/.",
|
| 665 |
+
"category": "consonant",
|
| 666 |
+
"phoneme": "z"
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"id": "feedback.mp4_sent_078",
|
| 670 |
+
"video_file": "feedback.mp4",
|
| 671 |
+
"start": 378.855,
|
| 672 |
+
"end": 382.635,
|
| 673 |
+
"text": "Check your consonant sound /zh/.",
|
| 674 |
+
"category": "consonant",
|
| 675 |
+
"phoneme": "Κ"
|
| 676 |
}
|
| 677 |
+
|
| 678 |
+
]
|
verification.py
CHANGED
|
@@ -498,6 +498,7 @@ from ragg.app import rag_bp
|
|
| 498 |
from pron import pron_bp
|
| 499 |
from pronvideo import pronvideo_bp
|
| 500 |
from pronragg import pronragg_bp
|
|
|
|
| 501 |
from ragg.ingest_trigger import ingest_trigger_bp
|
| 502 |
app.register_blueprint(movie_bp, url_prefix="/media")
|
| 503 |
app.register_blueprint(questions_bp, url_prefix="/media")
|
|
@@ -511,6 +512,7 @@ app.register_blueprint(ingest_trigger_bp, url_prefix="/rag")
|
|
| 511 |
app.register_blueprint(pron_bp, url_prefix="/pron")
|
| 512 |
app.register_blueprint(pronvideo_bp, url_prefix="/pronvideo")
|
| 513 |
app.register_blueprint(pronragg_bp, url_prefix="/pronragg")
|
|
|
|
| 514 |
# app.register_blueprint(questions_bp, url_prefix="/media") # <-- add this
|
| 515 |
# ------------------------------------------------------------------------------
|
| 516 |
# Local run (Gunicorn will import `verification:app` on Spaces)
|
|
|
|
| 498 |
from pron import pron_bp
|
| 499 |
from pronvideo import pronvideo_bp
|
| 500 |
from pronragg import pronragg_bp
|
| 501 |
+
from pronragupgrade import pronragupgrade_bp
|
| 502 |
from ragg.ingest_trigger import ingest_trigger_bp
|
| 503 |
app.register_blueprint(movie_bp, url_prefix="/media")
|
| 504 |
app.register_blueprint(questions_bp, url_prefix="/media")
|
|
|
|
| 512 |
app.register_blueprint(pron_bp, url_prefix="/pron")
|
| 513 |
app.register_blueprint(pronvideo_bp, url_prefix="/pronvideo")
|
| 514 |
app.register_blueprint(pronragg_bp, url_prefix="/pronragg")
|
| 515 |
+
app.register_blueprint(pronragupgrade_bp, url_prefix="/pronragupgrade")
|
| 516 |
# app.register_blueprint(questions_bp, url_prefix="/media") # <-- add this
|
| 517 |
# ------------------------------------------------------------------------------
|
| 518 |
# Local run (Gunicorn will import `verification:app` on Spaces)
|