Files changed (3) hide show
  1. app.py +166 -108
  2. packages.txt +8 -1
  3. requirements.txt +14 -3
app.py CHANGED
@@ -1,108 +1,166 @@
1
- import torch
2
-
3
- import spaces
4
- import gradio as gr
5
- import yt_dlp as youtube_dl
6
- from transformers import pipeline
7
- from transformers.pipelines.audio_utils import ffmpeg_read
8
-
9
- import tempfile
10
- import os
11
-
12
- MODEL_NAME = "openai/whisper-large-v3"
13
- BATCH_SIZE = 8
14
- FILE_LIMIT_MB = 1000
15
- YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
16
-
17
- device = 0 if torch.cuda.is_available() else "cpu"
18
-
19
- pipe = pipeline(
20
- task="automatic-speech-recognition",
21
- model=MODEL_NAME,
22
- chunk_length_s=30,
23
- device=device,
24
- )
25
-
26
- @spaces.GPU
27
- def transcribe(inputs, task):
28
- if inputs is None:
29
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
30
-
31
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
32
- return text
33
-
34
-
35
- def _return_yt_html_embed(yt_url):
36
- video_id = yt_url.split("?v=")[-1]
37
- HTML_str = (
38
- f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
39
- " </center>"
40
- )
41
- return HTML_str
42
-
43
- def download_yt_audio(yt_url, filename):
44
- info_loader = youtube_dl.YoutubeDL()
45
-
46
- try:
47
- info = info_loader.extract_info(yt_url, download=False)
48
- except youtube_dl.utils.DownloadError as err:
49
- raise gr.Error(str(err))
50
-
51
- file_length = info["duration_string"]
52
- file_h_m_s = file_length.split(":")
53
- file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
54
-
55
- if len(file_h_m_s) == 1:
56
- file_h_m_s.insert(0, 0)
57
- if len(file_h_m_s) == 2:
58
- file_h_m_s.insert(0, 0)
59
- file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
60
-
61
- if file_length_s > YT_LENGTH_LIMIT_S:
62
- yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
63
- file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
64
- raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
65
-
66
- ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
67
-
68
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
69
- try:
70
- ydl.download([yt_url])
71
- except youtube_dl.utils.ExtractorError as err:
72
- raise gr.Error(str(err))
73
-
74
- @spaces.GPU
75
- def yt_transcribe(yt_url, task, max_filesize=75.0):
76
- html_embed_str = _return_yt_html_embed(yt_url)
77
-
78
- with tempfile.TemporaryDirectory() as tmpdirname:
79
- filepath = os.path.join(tmpdirname, "video.mp4")
80
- download_yt_audio(yt_url, filepath)
81
- with open(filepath, "rb") as f:
82
- inputs = f.read()
83
-
84
- inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
85
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
86
-
87
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
88
-
89
- return html_embed_str, text
90
-
91
-
92
- demo = gr.Interface(
93
- fn=transcribe,
94
- inputs=[
95
- gr.Audio(type="filepath"),
96
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
97
- ],
98
- outputs=gr.Textbox(lines=3),
99
- title="Whisper Large V3: Transcribe Audio",
100
- description=(
101
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
102
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
103
- " of arbitrary length."
104
- ),
105
- allow_flagging="never",
106
- )
107
-
108
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {\rtf1\ansi\ansicpg1254\cocoartf2868
2
+ \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
3
+ {\colortbl;\red255\green255\blue255;}
4
+ {\*\expandedcolortbl;;}
5
+ \paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
6
+ \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
7
+
8
+ \f0\fs24 \cf0 # -*- coding: utf-8 -*-\
9
+ import streamlit as st\
10
+ import whisper\
11
+ import os\
12
+ import re\
13
+ import json\
14
+ import pandas as pd\
15
+ from datetime import datetime\
16
+ from difflib import SequenceMatcher\
17
+ \
18
+ # --- DOSYA VE VER\uc0\u304 Y\'d6NET\u304 M\u304 ---\
19
+ DATA_FILE = "akademik_kayitlar.csv"\
20
+ EXCEPTION_FILE = "istisnalar.json"\
21
+ \
22
+ def clean_text(text):\
23
+ text = text.lower().replace('\uc0\u105 \u775 ', 'i').replace('I', '\u305 ')\
24
+ return re.sub(r'[^a-z\'e7\uc0\u287 \u305 \u351 \'f6\'fc\\s]', '', text)\
25
+ \
26
+ def load_exceptions():\
27
+ if os.path.exists(EXCEPTION_FILE):\
28
+ try:\
29
+ with open(EXCEPTION_FILE, "r", encoding="utf-8") as f:\
30
+ return json.load(f)\
31
+ except: return \{\}\
32
+ return \{\}\
33
+ \
34
+ def save_exception(wrong, correct):\
35
+ ex = load_exceptions()\
36
+ ex[clean_text(wrong).strip()] = clean_text(correct).strip()\
37
+ with open(EXCEPTION_FILE, "w", encoding="utf-8") as f:\
38
+ json.dump(ex, f, ensure_ascii=False)\
39
+ \
40
+ def delete_exception(word):\
41
+ ex = load_exceptions()\
42
+ if word in ex:\
43
+ del ex[word]\
44
+ with open(EXCEPTION_FILE, "w", encoding="utf-8") as f:\
45
+ json.dump(ex, f, ensure_ascii=False)\
46
+ \
47
+ # --- SAYFA AYARLARI ---\
48
+ st.set_page_config(page_title="HFS Reading Fluency Lab", layout="wide")\
49
+ st.markdown("<div id='top'></div>", unsafe_allow_html=True)\
50
+ \
51
+ @st.cache_resource\
52
+ def load_model():\
53
+ # Sizin be\uc0\u287 endi\u287 iniz en g\'fc\'e7l\'fc model: Large-V3\
54
+ return whisper.load_model("large-v3")\
55
+ \
56
+ model = load_model()\
57
+ \
58
+ if 'results' not in st.session_state: st.session_state.results = []\
59
+ if 'duration' not in st.session_state: st.session_state.duration = 1\
60
+ \
61
+ # --- ARAY\'dcZ ---\
62
+ st.title("\uc0\u55357 \u56538 Reading Fluency & Accuracy Lab (HFS Edition)")\
63
+ st.info("Bu sistem Whisper Large V3 motoru ve Global Alignment algoritmas\uc0\u305 ile \'e7al\u305 \u351 maktad\u305 r.")\
64
+ \
65
+ tab1, tab2, tab3 = st.tabs(["\uc0\u55357 \u56589 Analiz Paneli", "\u55357 \u56540 Ar\u351 iv", "\u9881 \u65039 S\'f6zl\'fck"])\
66
+ \
67
+ with tab1:\
68
+ st.sidebar.title("\uc0\u55357 \u56424 \u8205 \u55356 \u57323 Akademik Panel")\
69
+ st_name = st.sidebar.text_input("\'d6\uc0\u287 renci Ad\u305 Soyad\u305 :", placeholder="\'d6rn: Ahmet Y\u305 lmaz")\
70
+ strictness = st.sidebar.slider("Hassasiyet (Strictness)", 0.70, 1.0, 0.90)\
71
+ \
72
+ col1, col2 = st.columns(2)\
73
+ with col1:\
74
+ orig_text = st.text_area("Orijinal Metin:", height=150)\
75
+ with col2:\
76
+ audio_file = st.file_uploader("MP4/MP3 Kayd\uc0\u305 Y\'fckle:", type=["wav", "mp3", "m4a", "mp4"])\
77
+ if audio_file: st.audio(audio_file)\
78
+ \
79
+ if st.button("\uc0\u55357 \u56589 Analizi Ba\u351 lat", use_container_width=True):\
80
+ if audio_file and orig_text:\
81
+ ext = audio_file.name.split(".")[-1]\
82
+ temp_name = f"temp_analysis.\{ext\}"\
83
+ with open(temp_name, "wb") as f: f.write(audio_file.getbuffer())\
84
+ \
85
+ with st.spinner("Whisper Large V3 sesi i\uc0\u351 liyor..."):\
86
+ result = model.transcribe(temp_name, language="tr", initial_prompt=orig_text, temperature=0.0)\
87
+ st.session_state.duration = result.get("segments", [0])[-1].get("end", 1) if result.get("segments") else 1\
88
+ \
89
+ orig_words = [clean_text(w) for w in orig_text.split() if clean_text(w)]\
90
+ trans_words = [clean_text(w) for w in result["text"].split() if clean_text(w)]\
91
+ \
92
+ ex_dict = load_exceptions()\
93
+ matcher = SequenceMatcher(None, orig_words, trans_words)\
94
+ aligned = []\
95
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():\
96
+ if tag == 'equal':\
97
+ for i in range(i1, i2):\
98
+ aligned.append(\{"h": orig_words[i], "d": trans_words[j1+(i-i1)], "s": True\})\
99
+ elif tag in ['replace', 'delete']:\
100
+ for i in range(i1, i2):\
101
+ t_w = trans_words[j1+(i-i1)] if j1+(i-i1) < j2 else "---"\
102
+ processed_tw = ex_dict.get(t_w, t_w)\
103
+ sim = SequenceMatcher(None, processed_tw, orig_words[i]).ratio()\
104
+ aligned.append(\{"h": orig_words[i], "d": t_w, "s": sim >= strictness\})\
105
+ \
106
+ st.session_state.results = aligned\
107
+ if os.path.exists(temp_name): os.remove(temp_name)\
108
+ \
109
+ if st.session_state.results:\
110
+ total_words = len(st.session_state.results)\
111
+ correct_count = sum(1 for r in st.session_state.results if r['s'])\
112
+ accuracy = (correct_count / total_words) * 100 if total_words > 0 else 0\
113
+ wcpm = (correct_count / st.session_state.duration) * 60\
114
+ errors = total_words - correct_count\
115
+ \
116
+ st.divider()\
117
+ m1, m2, m3, m4 = st.columns(4)\
118
+ m1.metric("Reading Accuracy", f"%\{accuracy:.1f\}")\
119
+ m2.metric("Reading Rate (WCPM)", int(wcpm))\
120
+ m3.metric("Correct Words", correct_count)\
121
+ m4.metric("Errors", errors)\
122
+ \
123
+ for i, res in enumerate(st.session_state.results):\
124
+ r1, r2, r3, r4, r5 = st.columns([0.5, 2, 2, 1, 2])\
125
+ r1.write(i+1); r2.write(res['h'])\
126
+ color = "green" if res['s'] else "red"\
127
+ r3.markdown(f"<span style='color:\{color\}; font-weight:bold;'>\{res['d']\}</span>", unsafe_allow_html=True)\
128
+ r4.write("\uc0\u9989 " if res['s'] else "\u10060 ")\
129
+ if not res['s'] and res['d'] != "---":\
130
+ if r5.button(f"\'d6\uc0\u287 ret", key=f"p_\{i\}"):\
131
+ save_exception(res['d'], res['h']); st.session_state.results[i]['s'] = True; st.rerun()\
132
+ elif r5.button("D\'fczelt", key=f"f_\{i\}"):\
133
+ st.session_state.results[i]['s'] = not res['s']; st.rerun()\
134
+ \
135
+ st.divider()\
136
+ save_col, top_col = st.columns([4, 1])\
137
+ with save_col:\
138
+ if st.button("\uc0\u55357 \u56510 Analizi Ar\u351 ive Kaydet", use_container_width=True):\
139
+ if not st_name.strip(): st.error("\uc0\u304 sim giriniz!")\
140
+ else:\
141
+ is_duplicate = False\
142
+ if os.path.exists(DATA_FILE):\
143
+ check_df = pd.read_csv(DATA_FILE)\
144
+ if 'Student' in check_df.columns and st_name.strip() in check_df['Student'].astype(str).values:\
145
+ is_duplicate = True\
146
+ \
147
+ if is_duplicate: st.warning("Bu isim zaten var.")\
148
+ else:\
149
+ record = \{"Date": datetime.now().strftime("%Y-%m-%d %H:%M"), "Student": st_name, "Accuracy (%)": f"%\{accuracy:.1f\}", "WCPM": int(wcpm), "Errors": errors, "Total Words": total_words\}\
150
+ pd.DataFrame([record]).to_csv(DATA_FILE, mode='a', index=False, header=not os.path.exists(DATA_FILE), encoding='utf-8-sig')\
151
+ st.success("Kaydedildi!"); st.balloons()\
152
+ with top_col:\
153
+ st.markdown("<a href='#top' style='text-decoration:none;'><div style='background-color:#262730; color:white; padding:10px; border-radius:5px; text-align:center; font-weight:bold;'>\uc0\u11014 \u65039 \'dcst</div></a>", unsafe_allow_html=True)\
154
+ \
155
+ with tab2:\
156
+ if os.path.exists(DATA_FILE):\
157
+ history_df = pd.read_csv(DATA_FILE)\
158
+ st.dataframe(history_df, use_container_width=True)\
159
+ st.download_button("\uc0\u55357 \u56549 \u304 ndir", history_df.to_csv(index=False).encode('utf-8-sig'), "arsiv.csv", "text/csv")\
160
+ \
161
+ with tab3:\
162
+ current_ex = load_exceptions()\
163
+ for wrong, correct in current_ex.items():\
164
+ c_w, c_c, c_d = st.columns([3, 3, 2])\
165
+ c_w.write(wrong); c_c.write(correct)\
166
+ if c_d.button("Sil", key=f"del_\{wrong\}"): delete_exception(wrong); st.rerun()}
packages.txt CHANGED
@@ -1 +1,8 @@
1
- ffmpeg
 
 
 
 
 
 
 
 
1
+ {\rtf1\ansi\ansicpg1254\cocoartf2868
2
+ \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
3
+ {\colortbl;\red255\green255\blue255;}
4
+ {\*\expandedcolortbl;;}
5
+ \paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
6
+ \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
7
+
8
+ \f0\fs24 \cf0 ffmpeg}
requirements.txt CHANGED
@@ -1,3 +1,14 @@
1
- git+https://github.com/huggingface/transformers
2
- torch
3
- yt-dlp
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {\rtf1\ansi\ansicpg1254\cocoartf2868
2
+ \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
3
+ {\colortbl;\red255\green255\blue255;}
4
+ {\*\expandedcolortbl;;}
5
+ \paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
6
+ \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
7
+
8
+ \f0\fs24 \cf0 openai-whisper\
9
+ streamlit\
10
+ pandas\
11
+ setuptools\
12
+ jinja2\
13
+ torch\
14
+ numpy}