Spaces:

openai
/

whisper

Paused

App Files Files Community

182

Upload 3 files

#149

by kagankeskin - opened Mar 5

base: refs/heads/main

←

from: refs/pr/149

Discussion Files changed

+188

-112

Files changed (3) hide show

app.py +166 -108
packages.txt +8 -1
requirements.txt +14 -3

app.py CHANGED Viewed

@@ -1,108 +1,166 @@
-import torch
-import spaces
-import gradio as gr
-import yt_dlp as youtube_dl
-from transformers import pipeline
-from transformers.pipelines.audio_utils import ffmpeg_read
-import tempfile
-import os
-MODEL_NAME = "openai/whisper-large-v3"
-BATCH_SIZE = 8
-FILE_LIMIT_MB = 1000
-YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
-device = 0 if torch.cuda.is_available() else "cpu"
-pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=MODEL_NAME,
-    chunk_length_s=30,
-    device=device,
-)
-@spaces.GPU
-def transcribe(inputs, task):
-    if inputs is None:
-        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
-    return  text
-def _return_yt_html_embed(yt_url):
-    video_id = yt_url.split("?v=")[-1]
-    HTML_str = (
-        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
-        " </center>"
-    )
-    return HTML_str
-def download_yt_audio(yt_url, filename):
-    info_loader = youtube_dl.YoutubeDL()
-    try:
-        info = info_loader.extract_info(yt_url, download=False)
-    except youtube_dl.utils.DownloadError as err:
-        raise gr.Error(str(err))
-    file_length = info["duration_string"]
-    file_h_m_s = file_length.split(":")
-    file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
-    if len(file_h_m_s) == 1:
-        file_h_m_s.insert(0, 0)
-    if len(file_h_m_s) == 2:
-        file_h_m_s.insert(0, 0)
-    file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
-    if file_length_s > YT_LENGTH_LIMIT_S:
-        yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
-        file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
-        raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
-    ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        try:
-            ydl.download([yt_url])
-        except youtube_dl.utils.ExtractorError as err:
-            raise gr.Error(str(err))
-@spaces.GPU
-def yt_transcribe(yt_url, task, max_filesize=75.0):
-    html_embed_str = _return_yt_html_embed(yt_url)
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        filepath = os.path.join(tmpdirname, "video.mp4")
-        download_yt_audio(yt_url, filepath)
-        with open(filepath, "rb") as f:
-            inputs = f.read()
-    inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
-    inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
-    return html_embed_str, text
-demo = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(type="filepath"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-    ],
-    outputs=gr.Textbox(lines=3),
-    title="Whisper Large V3: Transcribe Audio",
-    description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-        " of arbitrary length."
-    ),
-    allow_flagging="never",
-)
-demo.launch()

+{\rtf1\ansi\ansicpg1254\cocoartf2868
+\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+{\*\expandedcolortbl;;}
+\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
+\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
+\f0\fs24 \cf0 # -*- coding: utf-8 -*-\
+import streamlit as st\
+import whisper\
+import os\
+import re\
+import json\
+import pandas as pd\
+from datetime import datetime\
+from difflib import SequenceMatcher\
+\
+# --- DOSYA VE VER\uc0\u304  Y\'d6NET\u304 M\u304  ---\
+DATA_FILE = "akademik_kayitlar.csv"\
+EXCEPTION_FILE = "istisnalar.json"\
+\
+def clean_text(text):\
+    text = text.lower().replace('\uc0\u105 \u775 ', 'i').replace('I', '\u305 ')\
+    return re.sub(r'[^a-z\'e7\uc0\u287 \u305 \u351 \'f6\'fc\\s]', '', text)\
+\
+def load_exceptions():\
+    if os.path.exists(EXCEPTION_FILE):\
+        try:\
+            with open(EXCEPTION_FILE, "r", encoding="utf-8") as f:\
+                return json.load(f)\
+        except: return \{\}\
+    return \{\}\
+\
+def save_exception(wrong, correct):\
+    ex = load_exceptions()\
+    ex[clean_text(wrong).strip()] = clean_text(correct).strip()\
+    with open(EXCEPTION_FILE, "w", encoding="utf-8") as f:\
+        json.dump(ex, f, ensure_ascii=False)\
+\
+def delete_exception(word):\
+    ex = load_exceptions()\
+    if word in ex:\
+        del ex[word]\
+        with open(EXCEPTION_FILE, "w", encoding="utf-8") as f:\
+            json.dump(ex, f, ensure_ascii=False)\
+\
+# --- SAYFA AYARLARI ---\
+st.set_page_config(page_title="HFS Reading Fluency Lab", layout="wide")\
+st.markdown("<div id='top'></div>", unsafe_allow_html=True)\
+\
+@st.cache_resource\
+def load_model():\
+    # Sizin be\uc0\u287 endi\u287 iniz en g\'fc\'e7l\'fc model: Large-V3\
+    return whisper.load_model("large-v3")\
+\
+model = load_model()\
+\
+if 'results' not in st.session_state: st.session_state.results = []\
+if 'duration' not in st.session_state: st.session_state.duration = 1\
+\
+# --- ARAY\'dcZ ---\
+st.title("\uc0\u55357 \u56538  Reading Fluency & Accuracy Lab (HFS Edition)")\
+st.info("Bu sistem Whisper Large V3 motoru ve Global Alignment algoritmas\uc0\u305  ile \'e7al\u305 \u351 maktad\u305 r.")\
+\
+tab1, tab2, tab3 = st.tabs(["\uc0\u55357 \u56589  Analiz Paneli", "\u55357 \u56540  Ar\u351 iv", "\u9881 \u65039  S\'f6zl\'fck"])\
+\
+with tab1:\
+    st.sidebar.title("\uc0\u55357 \u56424 \u8205 \u55356 \u57323  Akademik Panel")\
+    st_name = st.sidebar.text_input("\'d6\uc0\u287 renci Ad\u305  Soyad\u305 :", placeholder="\'d6rn: Ahmet Y\u305 lmaz")\
+    strictness = st.sidebar.slider("Hassasiyet (Strictness)", 0.70, 1.0, 0.90)\
+\
+    col1, col2 = st.columns(2)\
+    with col1:\
+        orig_text = st.text_area("Orijinal Metin:", height=150)\
+    with col2:\
+        audio_file = st.file_uploader("MP4/MP3 Kayd\uc0\u305  Y\'fckle:", type=["wav", "mp3", "m4a", "mp4"])\
+        if audio_file: st.audio(audio_file)\
+\
+    if st.button("\uc0\u55357 \u56589  Analizi Ba\u351 lat", use_container_width=True):\
+        if audio_file and orig_text:\
+            ext = audio_file.name.split(".")[-1]\
+            temp_name = f"temp_analysis.\{ext\}"\
+            with open(temp_name, "wb") as f: f.write(audio_file.getbuffer())\
+            \
+            with st.spinner("Whisper Large V3 sesi i\uc0\u351 liyor..."):\
+                result = model.transcribe(temp_name, language="tr", initial_prompt=orig_text, temperature=0.0)\
+                st.session_state.duration = result.get("segments", [0])[-1].get("end", 1) if result.get("segments") else 1\
+                \
+                orig_words = [clean_text(w) for w in orig_text.split() if clean_text(w)]\
+                trans_words = [clean_text(w) for w in result["text"].split() if clean_text(w)]\
+                \
+                ex_dict = load_exceptions()\
+                matcher = SequenceMatcher(None, orig_words, trans_words)\
+                aligned = []\
+                for tag, i1, i2, j1, j2 in matcher.get_opcodes():\
+                    if tag == 'equal':\
+                        for i in range(i1, i2):\
+                            aligned.append(\{"h": orig_words[i], "d": trans_words[j1+(i-i1)], "s": True\})\
+                    elif tag in ['replace', 'delete']:\
+                        for i in range(i1, i2):\
+                            t_w = trans_words[j1+(i-i1)] if j1+(i-i1) < j2 else "---"\
+                            processed_tw = ex_dict.get(t_w, t_w)\
+                            sim = SequenceMatcher(None, processed_tw, orig_words[i]).ratio()\
+                            aligned.append(\{"h": orig_words[i], "d": t_w, "s": sim >= strictness\})\
+                \
+                st.session_state.results = aligned\
+                if os.path.exists(temp_name): os.remove(temp_name)\
+\
+    if st.session_state.results:\
+        total_words = len(st.session_state.results)\
+        correct_count = sum(1 for r in st.session_state.results if r['s'])\
+        accuracy = (correct_count / total_words) * 100 if total_words > 0 else 0\
+        wcpm = (correct_count / st.session_state.duration) * 60\
+        errors = total_words - correct_count\
+\
+        st.divider()\
+        m1, m2, m3, m4 = st.columns(4)\
+        m1.metric("Reading Accuracy", f"%\{accuracy:.1f\}")\
+        m2.metric("Reading Rate (WCPM)", int(wcpm))\
+        m3.metric("Correct Words", correct_count)\
+        m4.metric("Errors", errors)\
+        \
+        for i, res in enumerate(st.session_state.results):\
+            r1, r2, r3, r4, r5 = st.columns([0.5, 2, 2, 1, 2])\
+            r1.write(i+1); r2.write(res['h'])\
+            color = "green" if res['s'] else "red"\
+            r3.markdown(f"<span style='color:\{color\}; font-weight:bold;'>\{res['d']\}</span>", unsafe_allow_html=True)\
+            r4.write("\uc0\u9989 " if res['s'] else "\u10060 ")\
+            if not res['s'] and res['d'] != "---":\
+                if r5.button(f"\'d6\uc0\u287 ret", key=f"p_\{i\}"):\
+                    save_exception(res['d'], res['h']); st.session_state.results[i]['s'] = True; st.rerun()\
+            elif r5.button("D\'fczelt", key=f"f_\{i\}"):\
+                st.session_state.results[i]['s'] = not res['s']; st.rerun()\
+\
+        st.divider()\
+        save_col, top_col = st.columns([4, 1])\
+        with save_col:\
+            if st.button("\uc0\u55357 \u56510  Analizi Ar\u351 ive Kaydet", use_container_width=True):\
+                if not st_name.strip(): st.error("\uc0\u304 sim giriniz!")\
+                else:\
+                    is_duplicate = False\
+                    if os.path.exists(DATA_FILE):\
+                        check_df = pd.read_csv(DATA_FILE)\
+                        if 'Student' in check_df.columns and st_name.strip() in check_df['Student'].astype(str).values:\
+                            is_duplicate = True\
+                    \
+                    if is_duplicate: st.warning("Bu isim zaten var.")\
+                    else:\
+                        record = \{"Date": datetime.now().strftime("%Y-%m-%d %H:%M"), "Student": st_name, "Accuracy (%)": f"%\{accuracy:.1f\}", "WCPM": int(wcpm), "Errors": errors, "Total Words": total_words\}\
+                        pd.DataFrame([record]).to_csv(DATA_FILE, mode='a', index=False, header=not os.path.exists(DATA_FILE), encoding='utf-8-sig')\
+                        st.success("Kaydedildi!"); st.balloons()\
+        with top_col:\
+            st.markdown("<a href='#top' style='text-decoration:none;'><div style='background-color:#262730; color:white; padding:10px; border-radius:5px; text-align:center; font-weight:bold;'>\uc0\u11014 \u65039  \'dcst</div></a>", unsafe_allow_html=True)\
+\
+with tab2:\
+    if os.path.exists(DATA_FILE):\
+        history_df = pd.read_csv(DATA_FILE)\
+        st.dataframe(history_df, use_container_width=True)\
+        st.download_button("\uc0\u55357 \u56549  \u304 ndir", history_df.to_csv(index=False).encode('utf-8-sig'), "arsiv.csv", "text/csv")\
+\
+with tab3:\
+    current_ex = load_exceptions()\
+    for wrong, correct in current_ex.items():\
+        c_w, c_c, c_d = st.columns([3, 3, 2])\
+        c_w.write(wrong); c_c.write(correct)\
+        if c_d.button("Sil", key=f"del_\{wrong\}"): delete_exception(wrong); st.rerun()}

packages.txt CHANGED Viewed

	@@ -1 +1,8 @@
1	- ~~ffmpeg~~

+{\rtf1\ansi\ansicpg1254\cocoartf2868
+\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+{\*\expandedcolortbl;;}
+\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
+\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
+\f0\fs24 \cf0 ffmpeg}

requirements.txt CHANGED Viewed

@@ -1,3 +1,14 @@
-git+https://github.com/huggingface/transformers
-torch
-yt-dlp

+{\rtf1\ansi\ansicpg1254\cocoartf2868
+\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+{\*\expandedcolortbl;;}
+\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
+\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
+\f0\fs24 \cf0 openai-whisper\
+streamlit\
+pandas\
+setuptools\
+jinja2\
+torch\
+numpy}