WordsStory / app.py
redstoneleo's picture
Upload 5 files
4946780 verified
import functools
import re
import gradio as gr
try:
import pandas as pd
USE_PANDAS = True
except ImportError:
USE_PANDAS = False
from WordStoryAudioMaker import audioStoryMaker
def parse_phrases_text(phrases_text: str):
parts = re.split(r'[,\n]+', phrases_text)
return [p.strip() for p in parts if p.strip()]
def bold_to_html(text: str) -> str:
if not text:
return ""
return re.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", text)
def dict_list_to_html(dict_list):
if not dict_list:
return "<p><i>(没有词典数据)</i></p>"
col_map = {
"dictKeyText": "单词或短语",
"ukPhoneticSymbol": "英式音标",
"usPhoneticSymbol": "美式音标",
"definition": "释义",
"example": "例句",
"pos": "词性",
}
keys = [k for k in dict_list[0].keys() if k != "dictName"]
for d in dict_list:
if "definition" in d and isinstance(d["definition"], str):
d["definition"] = d["definition"].replace("\n", "<br>")
if USE_PANDAS:
df = pd.DataFrame(dict_list)[keys]
df.columns = [col_map.get(c, c) for c in df.columns]
html = df.to_html(escape=False, index=False)
else:
header = "".join(f"<th>{col_map.get(k, k)}</th>" for k in keys)
rows = []
for d in dict_list:
row = "".join(f"<td>{d.get(k, '')}</td>" for k in keys)
rows.append(f"<tr>{row}</tr>")
body = "\n".join(rows)
html = f"""
<table border='1' cellspacing='0' cellpadding='6' style='border-collapse:collapse;'>
<thead style='background:#f5f5f5;'><tr>{header}</tr></thead>
<tbody>{body}</tbody>
</table>
"""
return f"<div style='line-height:1.6;'>{html}</div>"
# ---------------------------
# 缓存实现部分
# ---------------------------
def _normalize_input(phrases_text: str) -> str:
if phrases_text is None:
return ""
s = phrases_text.strip()
s = re.sub(r'\r\n', '\n', s)
s = re.sub(r'\n+', '\n', s)
s = re.sub(r'[,,]+', '\n', s)
return s
@functools.lru_cache(maxsize=128)
def _cached_audio_for_input(norm_phrases_text: str):
phrases = parse_phrases_text(norm_phrases_text)
phrasesListStr, audioFilePath, dictResultList, storyJson = audioStoryMaker(phrases)
return phrasesListStr, audioFilePath, dictResultList, storyJson
def wrapper_run(phrases_text: str):
phrases_norm = _normalize_input(phrases_text)
if not phrases_norm:
return "<p>(没有输入短语)</p>", None, "", "", ""
phrasesListStr, audioFilePath, dictResultList, storyJson = _cached_audio_for_input(phrases_norm)
story_en_html = bold_to_html(storyJson.get("en", "")) if isinstance(storyJson, dict) else ""
story_zh_html = bold_to_html(storyJson.get("zh", "")) if isinstance(storyJson, dict) else ""
dict_html = dict_list_to_html(dictResultList)
story_en_html = f"<div style='line-height:1.6;'>{story_en_html}</div>"
story_zh_html = f"<div style='line-height:1.6;'>{story_zh_html}</div>"
phrases_html = f"<p><b>单词或短语:</b> {phrasesListStr}</p>"
return phrases_html, audioFilePath, story_zh_html, story_en_html, dict_html
# ---------------------------
# Gradio 界面部分
# ---------------------------
default_phrases = """nobility
underperform
insane
liable
conspiracy
proprietary
ditch"""
with gr.Blocks(title="单词故事") as demo:
gr.Markdown("""## 单词故事:听故事 = 复习 + 记忆单词!
单词故事利用 AI 将用户提供的一组单词或短语编成一个有趣的小故事,然后合成音频,使用户可以通过听故事来复习之前记过的单词——将枯燥的单词记忆变得轻松有趣起来!""")
inp = gr.Textbox(
label="""输入单词或短语(每行一个),建议每次提交7个,这是由George A. Miller (1956) 的论文The Magical Number Seven, Plus or Minus Two所揭示的记忆规律。
下面默认的内容是为了方便测试用,您可以删除后使用你的""",
placeholder="在这里输入英文单词或短语",
value=default_phrases,
lines=7
)
btn = gr.Button("生成")
out_phrasesListStr = gr.HTML(label="短语列表")
out_audio = gr.Audio(label="单词故事音频(播放)", type="filepath", autoplay=True)
out_storyZh = gr.HTML(label="中文故事")
out_storyEn = gr.HTML(label="英文故事")
out_dictTable = gr.HTML(label="词典释义表格")
btn.click(
fn=wrapper_run,
inputs=[inp],
outputs=[
out_phrasesListStr,
out_audio,
out_storyZh,
out_storyEn,
out_dictTable
]
)
if __name__ == "__main__":
demo.launch()