Yasu777's picture
Update app.py
a060cee verified
import gradio as gr
import os
import subprocess
import re
from tinydb import TinyDB, Query
import datetime
from datetime import timedelta
from bs4 import BeautifulSoup
import warnings
from article_generator import generate_article
from tavily_search import create_tavily_search_ui, tavily_search_interface
import time
def is_valid_html(html):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
BeautifulSoup(html, 'html.parser')
return len(w) == 0
# データベースの初期化
db = TinyDB("db.json")
current_time = datetime.datetime.now()
one_day_ago = current_time - timedelta(days=1)
db.remove(Query().timestamp.test(lambda x: datetime.datetime.fromisoformat(x) <= one_day_ago))
def process_article(new_keyword, other_keywords, action, edited_article_structure):
keyword_id = re.sub(r"\W+", "", new_keyword) if new_keyword else None
response_text = ""
final_article_content = ""
final_article_html = ""
if action == "構成作成":
all_keywords = new_keyword
if other_keywords:
all_keywords += " " + other_keywords
print(f"Running keywords_processor.py with arguments: {new_keyword}, {other_keywords}")
result = subprocess.run(["python3", "keywords_processor.py", new_keyword, other_keywords], capture_output=True, text=True)
print(f"Return code: {result.returncode}")
if result.returncode != 0:
response_text = "Keywords Processor Error: " + result.stderr
print("Keywords Processor Error:", result.stderr)
else:
response_text = result.stdout
print("Keywords Processor Output:", result.stdout)
# headline_generator.py を実行
print("Running headline_generator.py")
headline_result = subprocess.run(["python3", "headline_generator.py", new_keyword], capture_output=True, text=True)
print(f"Headline Generator Return code: {headline_result.returncode}")
if headline_result.returncode != 0:
print("Headline Generator Error:", headline_result.stderr)
else:
response_text += headline_result.stdout
print("Headline Generator Output:", headline_result.stdout)
# 編集可能な記事構成にのみ出力(HTML内容は空のまま)
return response_text, "", ""
elif action == "本文作成":
response_text = edited_article_structure
try:
final_article_content, final_article_html = generate_article(edited_article_structure)
# output3.txt ファイルが作成されるのを待つ
while not os.path.exists("output3.txt"):
time.sleep(0.1)
with open("output3.txt", "r", encoding="utf-8") as f:
final_article_content = f.read()
# final_article_htmlをgr.HTMLオブジェクトとして再初期化
final_article_html = gr.HTML(value=final_article_content, visible=True)
# デバッグ出力追加
print("After updating from output3.txt - final_article_html value:", final_article_html)
print("After updating from output3.txt - final_article_html type:", type(final_article_html))
except Exception as e:
final_article_content = f"予期せぬエラーが発生しました:{str(e)}"
final_article_html = gr.HTML(visible=False) # ここでも例外が発生した場合に備えて適切なオブジェクトを設定
print(f"予期せぬエラーが発生しました:{str(e)}")
print("final_article_html after error:", final_article_html)
print("final_article_html type after error:", type(final_article_html))
return response_text, final_article_content, final_article_html
def custom_html_to_markdown(html):
soup = BeautifulSoup(html, 'html.parser')
# 不要なタグの除去
for tag in soup(['script', 'style']):
tag.decompose()
# タグごとの処理
for h in soup.find_all('h1'):
h.replace_with(f"# {h.get_text().strip()}\n\n")
for h in soup.find_all('h2'):
h.replace_with(f"## {h.get_text().strip()}\n\n")
for h in soup.find_all('h3'):
h.replace_with(f"### {h.get_text().strip()}\n\n")
for p in soup.find_all('p'):
p.replace_with(f"{p.get_text().strip()}\n\n")
# 最終的なMarkdownテキストの取得
return soup.get_text()
def display_content(content, formatted_final_article_md, formatted_final_article_html):
print("Type of formatted_final_article_html before processing in display_content:", type(formatted_final_article_html))
if type(formatted_final_article_html) is not gr.components.HTML:
print("Error: formatted_final_article_html is not a gr.HTML object")
return formatted_final_article_md, formatted_final_article_html # エラーがあれば早期リターン
markdown_content = custom_html_to_markdown(content)
formatted_final_article_md.update(value=markdown_content, visible=True)
formatted_final_article_html.update(visible=False) # HTMLを非表示にする
return formatted_final_article_md, formatted_final_article_html
# Gradioアプリの設定
with gr.Blocks(css='''
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
word-wrap: break-word;
overflow-wrap: break-word;
}
''') as app:
with gr.Row():
with gr.Column():
gr.Markdown("### BabyWriterPRO")
new_keyword = gr.Textbox(label="キーワード")
other_keywords = gr.Textbox(label="その他のキーワード", lines=4)
create_structure_button = gr.Button("構成作成")
editable_output2 = gr.Textbox(label="編集可能な記事構成", lines=10, placeholder="記事構成がここに表示されます")
create_article_button = gr.Button("本文作成")
formatted_final_article_md = gr.Markdown(value="", visible=True, label="最終的な記事本文")
formatted_final_article_html = gr.HTML(value="", visible=False, label="最終的な記事本文(HTML)")
create_structure_button.click(
fn=process_article,
inputs=[new_keyword, other_keywords, gr.State(value="構成作成"), editable_output2],
outputs=[editable_output2, formatted_final_article_md]
)
create_article_button.click(
fn=process_article,
inputs=[new_keyword, other_keywords, gr.State(value="本文作成"), editable_output2],
outputs=[editable_output2, formatted_final_article_md]
)
print("Setup change event for display_format with MD and HTML components")
with gr.Column():
gr.Markdown("&nbsp;", elem_id="spacer") # 空白を作るための見えないMarkdown
selected_text = gr.Textbox(label="検索キーワード", lines=3, elem_classes="right-align")
domain_filter = gr.Radio(label="検索フィルタ(all:全検索、official:公式、企業、政府系)", choices=["all", "official"], value="all", elem_classes="right-align")
search_button = gr.Button("検索", elem_classes="right-align")
search_display_format = gr.Radio(label="表示形式", choices=["Markdown", "HTML"], value="Markdown", elem_classes="right-align")
search_results_md = gr.Markdown(elem_classes="right-align")
search_results_html = gr.HTML(elem_classes="right-align", visible=False)
def switch_search_display_format(format):
if format == "Markdown":
return gr.update(visible=True), gr.update(visible=False)
else:
return gr.update(visible=False), gr.update(visible=True)
search_button.click(tavily_search_interface, inputs=[selected_text, domain_filter], outputs=[search_results_md, search_results_html])
search_display_format.change(switch_search_display_format, inputs=[search_display_format], outputs=[search_results_md, search_results_html])
app.launch()