Spaces:
Paused
Paused
Upload 3 files
#149
by kagankeskin - opened
- app.py +166 -108
- packages.txt +8 -1
- requirements.txt +14 -3
app.py
CHANGED
|
@@ -1,108 +1,166 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
import
|
| 10 |
-
import
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
return
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
f
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{\rtf1\ansi\ansicpg1254\cocoartf2868
|
| 2 |
+
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
|
| 3 |
+
{\colortbl;\red255\green255\blue255;}
|
| 4 |
+
{\*\expandedcolortbl;;}
|
| 5 |
+
\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
|
| 6 |
+
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
|
| 7 |
+
|
| 8 |
+
\f0\fs24 \cf0 # -*- coding: utf-8 -*-\
|
| 9 |
+
import streamlit as st\
|
| 10 |
+
import whisper\
|
| 11 |
+
import os\
|
| 12 |
+
import re\
|
| 13 |
+
import json\
|
| 14 |
+
import pandas as pd\
|
| 15 |
+
from datetime import datetime\
|
| 16 |
+
from difflib import SequenceMatcher\
|
| 17 |
+
\
|
| 18 |
+
# --- DOSYA VE VER\uc0\u304 Y\'d6NET\u304 M\u304 ---\
|
| 19 |
+
DATA_FILE = "akademik_kayitlar.csv"\
|
| 20 |
+
EXCEPTION_FILE = "istisnalar.json"\
|
| 21 |
+
\
|
| 22 |
+
def clean_text(text):\
|
| 23 |
+
text = text.lower().replace('\uc0\u105 \u775 ', 'i').replace('I', '\u305 ')\
|
| 24 |
+
return re.sub(r'[^a-z\'e7\uc0\u287 \u305 \u351 \'f6\'fc\\s]', '', text)\
|
| 25 |
+
\
|
| 26 |
+
def load_exceptions():\
|
| 27 |
+
if os.path.exists(EXCEPTION_FILE):\
|
| 28 |
+
try:\
|
| 29 |
+
with open(EXCEPTION_FILE, "r", encoding="utf-8") as f:\
|
| 30 |
+
return json.load(f)\
|
| 31 |
+
except: return \{\}\
|
| 32 |
+
return \{\}\
|
| 33 |
+
\
|
| 34 |
+
def save_exception(wrong, correct):\
|
| 35 |
+
ex = load_exceptions()\
|
| 36 |
+
ex[clean_text(wrong).strip()] = clean_text(correct).strip()\
|
| 37 |
+
with open(EXCEPTION_FILE, "w", encoding="utf-8") as f:\
|
| 38 |
+
json.dump(ex, f, ensure_ascii=False)\
|
| 39 |
+
\
|
| 40 |
+
def delete_exception(word):\
|
| 41 |
+
ex = load_exceptions()\
|
| 42 |
+
if word in ex:\
|
| 43 |
+
del ex[word]\
|
| 44 |
+
with open(EXCEPTION_FILE, "w", encoding="utf-8") as f:\
|
| 45 |
+
json.dump(ex, f, ensure_ascii=False)\
|
| 46 |
+
\
|
| 47 |
+
# --- SAYFA AYARLARI ---\
|
| 48 |
+
st.set_page_config(page_title="HFS Reading Fluency Lab", layout="wide")\
|
| 49 |
+
st.markdown("<div id='top'></div>", unsafe_allow_html=True)\
|
| 50 |
+
\
|
| 51 |
+
@st.cache_resource\
|
| 52 |
+
def load_model():\
|
| 53 |
+
# Sizin be\uc0\u287 endi\u287 iniz en g\'fc\'e7l\'fc model: Large-V3\
|
| 54 |
+
return whisper.load_model("large-v3")\
|
| 55 |
+
\
|
| 56 |
+
model = load_model()\
|
| 57 |
+
\
|
| 58 |
+
if 'results' not in st.session_state: st.session_state.results = []\
|
| 59 |
+
if 'duration' not in st.session_state: st.session_state.duration = 1\
|
| 60 |
+
\
|
| 61 |
+
# --- ARAY\'dcZ ---\
|
| 62 |
+
st.title("\uc0\u55357 \u56538 Reading Fluency & Accuracy Lab (HFS Edition)")\
|
| 63 |
+
st.info("Bu sistem Whisper Large V3 motoru ve Global Alignment algoritmas\uc0\u305 ile \'e7al\u305 \u351 maktad\u305 r.")\
|
| 64 |
+
\
|
| 65 |
+
tab1, tab2, tab3 = st.tabs(["\uc0\u55357 \u56589 Analiz Paneli", "\u55357 \u56540 Ar\u351 iv", "\u9881 \u65039 S\'f6zl\'fck"])\
|
| 66 |
+
\
|
| 67 |
+
with tab1:\
|
| 68 |
+
st.sidebar.title("\uc0\u55357 \u56424 \u8205 \u55356 \u57323 Akademik Panel")\
|
| 69 |
+
st_name = st.sidebar.text_input("\'d6\uc0\u287 renci Ad\u305 Soyad\u305 :", placeholder="\'d6rn: Ahmet Y\u305 lmaz")\
|
| 70 |
+
strictness = st.sidebar.slider("Hassasiyet (Strictness)", 0.70, 1.0, 0.90)\
|
| 71 |
+
\
|
| 72 |
+
col1, col2 = st.columns(2)\
|
| 73 |
+
with col1:\
|
| 74 |
+
orig_text = st.text_area("Orijinal Metin:", height=150)\
|
| 75 |
+
with col2:\
|
| 76 |
+
audio_file = st.file_uploader("MP4/MP3 Kayd\uc0\u305 Y\'fckle:", type=["wav", "mp3", "m4a", "mp4"])\
|
| 77 |
+
if audio_file: st.audio(audio_file)\
|
| 78 |
+
\
|
| 79 |
+
if st.button("\uc0\u55357 \u56589 Analizi Ba\u351 lat", use_container_width=True):\
|
| 80 |
+
if audio_file and orig_text:\
|
| 81 |
+
ext = audio_file.name.split(".")[-1]\
|
| 82 |
+
temp_name = f"temp_analysis.\{ext\}"\
|
| 83 |
+
with open(temp_name, "wb") as f: f.write(audio_file.getbuffer())\
|
| 84 |
+
\
|
| 85 |
+
with st.spinner("Whisper Large V3 sesi i\uc0\u351 liyor..."):\
|
| 86 |
+
result = model.transcribe(temp_name, language="tr", initial_prompt=orig_text, temperature=0.0)\
|
| 87 |
+
st.session_state.duration = result.get("segments", [0])[-1].get("end", 1) if result.get("segments") else 1\
|
| 88 |
+
\
|
| 89 |
+
orig_words = [clean_text(w) for w in orig_text.split() if clean_text(w)]\
|
| 90 |
+
trans_words = [clean_text(w) for w in result["text"].split() if clean_text(w)]\
|
| 91 |
+
\
|
| 92 |
+
ex_dict = load_exceptions()\
|
| 93 |
+
matcher = SequenceMatcher(None, orig_words, trans_words)\
|
| 94 |
+
aligned = []\
|
| 95 |
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():\
|
| 96 |
+
if tag == 'equal':\
|
| 97 |
+
for i in range(i1, i2):\
|
| 98 |
+
aligned.append(\{"h": orig_words[i], "d": trans_words[j1+(i-i1)], "s": True\})\
|
| 99 |
+
elif tag in ['replace', 'delete']:\
|
| 100 |
+
for i in range(i1, i2):\
|
| 101 |
+
t_w = trans_words[j1+(i-i1)] if j1+(i-i1) < j2 else "---"\
|
| 102 |
+
processed_tw = ex_dict.get(t_w, t_w)\
|
| 103 |
+
sim = SequenceMatcher(None, processed_tw, orig_words[i]).ratio()\
|
| 104 |
+
aligned.append(\{"h": orig_words[i], "d": t_w, "s": sim >= strictness\})\
|
| 105 |
+
\
|
| 106 |
+
st.session_state.results = aligned\
|
| 107 |
+
if os.path.exists(temp_name): os.remove(temp_name)\
|
| 108 |
+
\
|
| 109 |
+
if st.session_state.results:\
|
| 110 |
+
total_words = len(st.session_state.results)\
|
| 111 |
+
correct_count = sum(1 for r in st.session_state.results if r['s'])\
|
| 112 |
+
accuracy = (correct_count / total_words) * 100 if total_words > 0 else 0\
|
| 113 |
+
wcpm = (correct_count / st.session_state.duration) * 60\
|
| 114 |
+
errors = total_words - correct_count\
|
| 115 |
+
\
|
| 116 |
+
st.divider()\
|
| 117 |
+
m1, m2, m3, m4 = st.columns(4)\
|
| 118 |
+
m1.metric("Reading Accuracy", f"%\{accuracy:.1f\}")\
|
| 119 |
+
m2.metric("Reading Rate (WCPM)", int(wcpm))\
|
| 120 |
+
m3.metric("Correct Words", correct_count)\
|
| 121 |
+
m4.metric("Errors", errors)\
|
| 122 |
+
\
|
| 123 |
+
for i, res in enumerate(st.session_state.results):\
|
| 124 |
+
r1, r2, r3, r4, r5 = st.columns([0.5, 2, 2, 1, 2])\
|
| 125 |
+
r1.write(i+1); r2.write(res['h'])\
|
| 126 |
+
color = "green" if res['s'] else "red"\
|
| 127 |
+
r3.markdown(f"<span style='color:\{color\}; font-weight:bold;'>\{res['d']\}</span>", unsafe_allow_html=True)\
|
| 128 |
+
r4.write("\uc0\u9989 " if res['s'] else "\u10060 ")\
|
| 129 |
+
if not res['s'] and res['d'] != "---":\
|
| 130 |
+
if r5.button(f"\'d6\uc0\u287 ret", key=f"p_\{i\}"):\
|
| 131 |
+
save_exception(res['d'], res['h']); st.session_state.results[i]['s'] = True; st.rerun()\
|
| 132 |
+
elif r5.button("D\'fczelt", key=f"f_\{i\}"):\
|
| 133 |
+
st.session_state.results[i]['s'] = not res['s']; st.rerun()\
|
| 134 |
+
\
|
| 135 |
+
st.divider()\
|
| 136 |
+
save_col, top_col = st.columns([4, 1])\
|
| 137 |
+
with save_col:\
|
| 138 |
+
if st.button("\uc0\u55357 \u56510 Analizi Ar\u351 ive Kaydet", use_container_width=True):\
|
| 139 |
+
if not st_name.strip(): st.error("\uc0\u304 sim giriniz!")\
|
| 140 |
+
else:\
|
| 141 |
+
is_duplicate = False\
|
| 142 |
+
if os.path.exists(DATA_FILE):\
|
| 143 |
+
check_df = pd.read_csv(DATA_FILE)\
|
| 144 |
+
if 'Student' in check_df.columns and st_name.strip() in check_df['Student'].astype(str).values:\
|
| 145 |
+
is_duplicate = True\
|
| 146 |
+
\
|
| 147 |
+
if is_duplicate: st.warning("Bu isim zaten var.")\
|
| 148 |
+
else:\
|
| 149 |
+
record = \{"Date": datetime.now().strftime("%Y-%m-%d %H:%M"), "Student": st_name, "Accuracy (%)": f"%\{accuracy:.1f\}", "WCPM": int(wcpm), "Errors": errors, "Total Words": total_words\}\
|
| 150 |
+
pd.DataFrame([record]).to_csv(DATA_FILE, mode='a', index=False, header=not os.path.exists(DATA_FILE), encoding='utf-8-sig')\
|
| 151 |
+
st.success("Kaydedildi!"); st.balloons()\
|
| 152 |
+
with top_col:\
|
| 153 |
+
st.markdown("<a href='#top' style='text-decoration:none;'><div style='background-color:#262730; color:white; padding:10px; border-radius:5px; text-align:center; font-weight:bold;'>\uc0\u11014 \u65039 \'dcst</div></a>", unsafe_allow_html=True)\
|
| 154 |
+
\
|
| 155 |
+
with tab2:\
|
| 156 |
+
if os.path.exists(DATA_FILE):\
|
| 157 |
+
history_df = pd.read_csv(DATA_FILE)\
|
| 158 |
+
st.dataframe(history_df, use_container_width=True)\
|
| 159 |
+
st.download_button("\uc0\u55357 \u56549 \u304 ndir", history_df.to_csv(index=False).encode('utf-8-sig'), "arsiv.csv", "text/csv")\
|
| 160 |
+
\
|
| 161 |
+
with tab3:\
|
| 162 |
+
current_ex = load_exceptions()\
|
| 163 |
+
for wrong, correct in current_ex.items():\
|
| 164 |
+
c_w, c_c, c_d = st.columns([3, 3, 2])\
|
| 165 |
+
c_w.write(wrong); c_c.write(correct)\
|
| 166 |
+
if c_d.button("Sil", key=f"del_\{wrong\}"): delete_exception(wrong); st.rerun()}
|
packages.txt
CHANGED
|
@@ -1 +1,8 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{\rtf1\ansi\ansicpg1254\cocoartf2868
|
| 2 |
+
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
|
| 3 |
+
{\colortbl;\red255\green255\blue255;}
|
| 4 |
+
{\*\expandedcolortbl;;}
|
| 5 |
+
\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
|
| 6 |
+
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
|
| 7 |
+
|
| 8 |
+
\f0\fs24 \cf0 ffmpeg}
|
requirements.txt
CHANGED
|
@@ -1,3 +1,14 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{\rtf1\ansi\ansicpg1254\cocoartf2868
|
| 2 |
+
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
|
| 3 |
+
{\colortbl;\red255\green255\blue255;}
|
| 4 |
+
{\*\expandedcolortbl;;}
|
| 5 |
+
\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
|
| 6 |
+
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
|
| 7 |
+
|
| 8 |
+
\f0\fs24 \cf0 openai-whisper\
|
| 9 |
+
streamlit\
|
| 10 |
+
pandas\
|
| 11 |
+
setuptools\
|
| 12 |
+
jinja2\
|
| 13 |
+
torch\
|
| 14 |
+
numpy}
|