Spaces:

Toya0421
/

Prompt_Test

Sleeping

App Files Files Community

Toya0421 commited on Dec 25, 2025

Commit

40925de

verified ·

1 Parent(s): cb9f047

Create app.py

Browse files

Files changed (1) hide show

app.py +280 -0

app.py ADDED Viewed

	@@ -0,0 +1,280 @@

+import os
+import re
+import glob
+import csv
+import threading
+from datetime import datetime, timedelta
+import gradio as gr
+import textstat
+from openai import OpenAI
+# =========================
+# 設定（元コード踏襲）
+# =========================
+API_KEY = os.getenv("API_KEY")
+BASE_URL = os.getenv("BASE_URL", "https://openrouter.ai/api/v1")
+MODEL = os.getenv("MODEL", "google/gemini-2.5-flash")
+# Hugging Face Spaces 永続ストレージ（推奨）
+# Spaces の Persistent Storage を有効化している前提（/data が使える）
+OUT_DIR = os.getenv("OUT_DIR", "/data")
+os.makedirs(OUT_DIR, exist_ok=True)
+CSV_PATH = os.path.join(OUT_DIR, "rewrite_scores.csv")
+PASSAGES_DIR = os.getenv("PASSAGES_DIR", "passages")
+if not API_KEY:
+    raise RuntimeError("API_KEY is not set (env: API_KEY)")
+client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
+# 同時実行を軽く制限（Spacesで安定させる）
+REWRITE_CONCURRENCY = int(os.getenv("REWRITE_CONCURRENCY", "2"))
+_rewrite_sem = threading.Semaphore(REWRITE_CONCURRENCY)
+_stop_flag_lock = threading.Lock()
+_stop_flag = False
+# =========================
+# passages の列挙
+# =========================
+def list_passage_files_sorted(passages_dir: str) -> list[tuple[int, str]]:
+    pattern = os.path.join(passages_dir, "pg*.txt")
+    files = glob.glob(pattern)
+    items = []
+    for fp in files:
+        name = os.path.basename(fp)
+        m = re.match(r"pg(\d+)\.txt$", name)
+        if m:
+            items.append((int(m.group(1)), fp))
+    items.sort(key=lambda x: x[0])
+    return items
+def load_text(path: str) -> str:
+    with open(path, "r", encoding="utf-8") as f:
+        return f.read()
+# =========================
+# 書き換え（プロンプト同一）
+# =========================
+def rewrite_level(text: str, target_level: int) -> str:
+    level_to_flesch = {1: 90, 2: 70, 3: 55, 4: 40, 5: 25}
+    target_flesch = level_to_flesch[int(target_level)]
+    prompt = f"""
+Rewrite the following passage so it fits about {target_flesch} Flesch Reading Ease Score
+- Extract only the portions of the text that should be read as the main body,
+excluding the title, author name, source information, chapter number, annotations, and footers.
+- When outputting, make sure sections divided by chapters, etc., are clearly distinguishable by leaving a blank line between them.
+- Preserve the original meaning faithfully.
+- Do not add new information or remove essential information.
+- Output only the rewritten passage. Do not include explanations.
+{text}
+"""
+    with _rewrite_sem:
+        resp = client.chat.completions.create(
+            model=MODEL,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.4,
+            max_tokens=5000
+        )
+    return resp.choices[0].message.content.strip()
+# =========================
+# 指標（FRE + 単語数）
+# =========================
+_word_re = re.compile(r"[A-Za-z]+(?:'[A-Za-z]+)?")
+def count_words_english(text: str) -> int:
+    return len(_word_re.findall(text))
+def compute_metrics(text: str) -> tuple[float, int]:
+    fre = float(textstat.flesch_reading_ease(text))
+    wc = count_words_english(text)
+    return fre, wc
+# =========================
+# CSV追記（軽量・永続）
+# =========================
+_csv_lock = threading.Lock()
+def append_csv_row(row: dict):
+    """
+    /data/rewrite_scores.csv に1行追記（ヘッダ無ければ作成）
+    """
+    fieldnames = ["timestamp_jst", "Text#", "target_level", "flesch_reading_ease", "word_count", "rewritten_text"]
+    with _csv_lock:
+        exists = os.path.exists(CSV_PATH)
+        with open(CSV_PATH, "a", encoding="utf-8", newline="") as f:
+            w = csv.DictWriter(f, fieldnames=fieldnames)
+            if not exists:
+                w.writeheader()
+            w.writerow({k: row.get(k, "") for k in fieldnames})
+# =========================
+# 停止フラグ
+# =========================
+def set_stop(flag: bool):
+    global _stop_flag
+    with _stop_flag_lock:
+        _stop_flag = flag
+def get_stop() -> bool:
+    with _stop_flag_lock:
+        return _stop_flag
+# =========================
+# UIロジック
+# =========================
+def init_state():
+    files = list_passage_files_sorted(PASSAGES_DIR)
+    return {
+        "files": files,     # [(text_id, path), ...]
+        "idx": 0,           # 次に処理する位置
+    }
+def start(level: int):
+    set_stop(False)
+    st = init_state()
+    total = len(st["files"])
+    if total == 0:
+        return st, "passages/pg*.txt が見つかりません", "", "", "", gr.update(visible=False)
+    msg = f"準備完了: {total}件。次に処理するのは #Text {st['files'][0][0]} です。"
+    return st, msg, "", "", "", gr.update(visible=True)
+def run_one(level: int, state: dict):
+    set_stop(False)
+    files = state.get("files", [])
+    idx = int(state.get("idx", 0))
+    total = len(files)
+    if idx >= total:
+        return state, "全て処理済みです。", "", "", "", gr.update(visible=True)
+    text_id, path = files[idx]
+    original = load_text(path)
+    rewritten = rewrite_level(original, target_level=level)
+    fre, wc = compute_metrics(rewritten)
+    # JSTタイムスタンプ（+9）
+    ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
+    append_csv_row({
+        "timestamp_jst": ts,
+        "Text#": text_id,
+        "target_level": level,
+        "flesch_reading_ease": f"{fre:.2f}",
+        "word_count": wc,
+        "rewritten_text": rewritten
+    })
+    state["idx"] = idx + 1
+    header = f"#Text {text_id}\nTarget Level: {level}\nFlesch Reading Ease: {fre:.2f}\nWord Count: {wc}\nSaved: {CSV_PATH}"
+    progress = f"{state['idx']} / {total}"
+    return state, "1件処理しました。", header, progress, rewritten, gr.update(visible=True)
+def run_all(level: int, state: dict):
+    """
+    全件（または残り）を順次処理。途中で「停止」ボタンで止められる。
+    """
+    set_stop(False)
+    files = state.get("files", [])
+    idx = int(state.get("idx", 0))
+    total = len(files)
+    if idx >= total:
+        return state, "全て処理済みです。", "", f"{idx} / {total}", "", gr.update(visible=True)
+    last_header = ""
+    last_text = ""
+    while idx < total:
+        if get_stop():
+            state["idx"] = idx
+            return state, "停止しました。", last_header, f"{idx} / {total}", last_text, gr.update(visible=True)
+        text_id, path = files[idx]
+        original = load_text(path)
+        rewritten = rewrite_level(original, target_level=level)
+        fre, wc = compute_metrics(rewritten)
+        ts = (datetime.utcnow() + timedelta(hours=9)).strftime("%Y-%m-%d %H:%M:%S")
+        append_csv_row({
+            "timestamp_jst": ts,
+            "Text#": text_id,
+            "target_level": level,
+            "flesch_reading_ease": f"{fre:.2f}",
+            "word_count": wc,
+            "rewritten_text": rewritten
+        })
+        last_header = f"#Text {text_id}\nTarget Level: {level}\nFlesch Reading Ease: {fre:.2f}\nWord Count: {wc}\nSaved: {CSV_PATH}"
+        last_text = rewritten
+        idx += 1
+        state["idx"] = idx
+    return state, "全件処理が完了しました。", last_header, f"{idx} / {total}", last_text, gr.update(visible=True)
+def stop():
+    set_stop(True)
+    return "停止要求を受け付けました（処理中の1件が終わったタイミングで止まります）。"
+def reset_csv():
+    with _csv_lock:
+        if os.path.exists(CSV_PATH):
+            os.remove(CSV_PATH)
+    return f"CSVを削除しました: {CSV_PATH}"
+# =========================
+# Gradio UI（Spaces向け）
+# =========================
+with gr.Blocks() as demo:
+    gr.Markdown("# 🔁 Passage Rewrite + FRE Scoring (HF Spaces)")
+    state = gr.State(init_state())
+    level = gr.Dropdown(choices=[1, 2, 3, 4, 5], value=1, label="Target Level (1..5)")
+    status = gr.Textbox(label="Status", interactive=False)
+    header = gr.Textbox(label="Result Header (#Text / FRE / Words)", lines=5, interactive=False)
+    progress = gr.Textbox(label="Progress", interactive=False)
+    output_text = gr.Textbox(label="Rewritten Text", lines=18, interactive=False)
+    with gr.Row():
+        start_btn = gr.Button("開始（ファイル読み込み）")
+        one_btn = gr.Button("次へ（1件処理）")
+        all_btn = gr.Button("全件実行（残りを処理）")
+        stop_btn = gr.Button("停止")
+    with gr.Row():
+        reset_btn = gr.Button("CSVリセット（削除）")
+        csv_hint = gr.Markdown(f"📄 CSV保存先: `{CSV_PATH}`（SpacesのFilesに出ます）")
+    # stop通知を見せる用
+    stop_note = gr.Markdown(visible=False)
+    start_btn.click(fn=start, inputs=[level], outputs=[state, status, header, progress, output_text, stop_note])
+    one_btn.click(fn=run_one, inputs=[level, state], outputs=[state, status, header, progress, output_text, stop_note])
+    all_btn.click(fn=run_all, inputs=[level, state], outputs=[state, status, header, progress, output_text, stop_note])
+    stop_btn.click(fn=stop, inputs=[], outputs=[status])
+    reset_btn.click(fn=reset_csv, inputs=[], outputs=[status])
+demo.queue(max_size=64)
+demo.launch()