BabyWriterPro

Sleeping

App Files Files Community

Yasu777 commited on Oct 8, 2023

Commit

38ce015

1 Parent(s): 0ec16c8

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -78

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import streamlit as st
 import os
 import subprocess
 from urllib.parse import urlparse
-import warnings
 from bs4 import BeautifulSoup
 import re
 from tinydb import TinyDB, Query
@@ -11,11 +10,13 @@ import urllib.request
 import urllib.error
 import datetime
-@st.cache_resource
 def get_top_urls_and_keyword(keyword):
     GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
     CUSTOM_SEARCH_ENGINE_ID = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
     service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
     response = service.cse().list(
         q=keyword,
@@ -25,9 +26,12 @@ def get_top_urls_and_keyword(keyword):
         start=1
     ).execute()
     urls = [item['link'] for item in response["items"][:3]]
     return urls, keyword
 def get_valid_url(urls):
     for url in urls:
         try:
@@ -35,6 +39,7 @@ def get_valid_url(urls):
             charset = response.headers.get_content_charset()
             html = response.read().decode(charset)
             soup = BeautifulSoup(html, "html.parser")
             if is_japanese_text(soup.get_text()):
                 return url
         except urllib.error.URLError as e:
@@ -43,69 +48,85 @@ def get_valid_url(urls):
             print(f"HTTPエラー: {e.code}")
         except:
             print("予期せぬエラーが発生しました。")
     return None
 def is_japanese_text(text):
     japanese_pattern = r"[\p{Hiragana}\p{Katakana}\p{Han}ー〜、。「」【】]"
     return bool(re.search(japanese_pattern, text))
-def is_valid_html(html):
-    with warnings.catch_warnings(record=True) as w:
-        warnings.simplefilter('always')
-        BeautifulSoup(html, 'html.parser')
-        return len(w) == 0
 db = TinyDB("db.json")
 current_time = datetime.datetime.now()
 one_day_ago = current_time - datetime.timedelta(days=1)
 db.remove(Query().timestamp.test(lambda x: datetime.datetime.fromisoformat(x) <= one_day_ago))
-st.title("記事生成ウェブアプリ")
-st.write("このアプリは、与えられたキーワードを使用して記事を生成します。")
 new_keyword = st.text_input("キーワード:")
 keyword_id = re.sub(r"\W+", "", new_keyword) if new_keyword else None
 last_keyword = db.search(Query().keyword_id.exists())
 if new_keyword and (not last_keyword or last_keyword[0]['keyword_id'] != keyword_id):
     if last_keyword:
         db.remove(Query().keyword_id == last_keyword[0]['keyword_id'])
     with open("output1.txt", "w") as f:
         f.write("")
     with open("output2.txt", "w") as f:
         f.write("")
     with open("output3.txt", "w") as f:
         f.write("")
     for i in range(1, 4):
         filename = f"output0-{i}.txt"
         if os.path.exists(filename):
             os.remove(filename)
 if new_keyword:
     urls, keyword = get_top_urls_and_keyword(new_keyword)
-    if len(urls) < 3:
         st.error("Google検索の結果が3つ未満です。別のキーワードを試してみてください。")
     else:
         url1, url2, url3 = urls
-if keyword_id:
-    output1 = st.empty()
-    output2 = st.empty()
-    output3 = st.empty()
-    result = db.search((Query().name == "output2.txt") & (Query().keyword_id == keyword_id))
-    if result:
-        editable_output2 = result[0]["content"]
-    else:
-        editable_output2 = ""
-    if st.button("記事構成作成", key=f"run_button_{keyword_id}"):
-        try:
             with st.spinner("タイトル・見出し作成中..."):
                 urls, keyword = get_top_urls_and_keyword(new_keyword)
                 url1, url2, url3 = urls
                 parsed_urls = [urlparse(url) for url in urls]
                 if len(urls) != len(set(urls)):
                     st.error("異なるURLを入力してください。")
@@ -114,20 +135,17 @@ if keyword_id:
                     st.error("異なるサイトのURLを入力してください。")
                     st.stop()
-                process = subprocess.Popen(["python3", "first.py", url1, url2, url3])
-                process.wait()
             with open("output1.txt", "r", encoding="utf-8") as f:
                 content = f.read()
             content = re.sub(r"\n関連するテキスト部分:.*", "", content, flags=re.DOTALL)
             output1.text(content)
             db.upsert({"name": "output1.txt", "content": content, "keyword_id": keyword_id},
-                      (Query().name == "output1.txt") & (Query().keyword_id == keyword_id))
-            with st.spinner("その他の処理を実行中..."):
-                process = subprocess.Popen(["python3", "second.py", keyword])
-                process.wait()
             with open("output2.txt", "r", encoding="utf-8") as f:
                 editable_output2 = f.read()
                 soup = BeautifulSoup(editable_output2, "html.parser")
@@ -146,63 +164,47 @@ if keyword_id:
                     )
             st.success("処理が完了しました。")
-        except subprocess.CalledProcessError:
-            st.error("記事の構成作成中にエラーが発生しました。もう一度お試しください。")
-        except Exception as e:
-            st.error(f"予期せぬエラーが発生しました：{str(e)}")
-    editable_output2 = st.text_area("output2.txtを編集してください:", value=editable_output2)
-    if st.button("記事作成"):
-        with st.spinner("記事作成中..."):
-            # 編集済みの "output2.txt" の内容をデータベースに保存
-            existing_docs = db.search((Query().name == "output2.txt") & (Query().keyword_id == keyword_id))
-            if existing_docs:
-                db.update({"content": editable_output2}, doc_ids=[doc.doc_id for doc in existing_docs])
-            # データベースから読み込んだ "output2.txt" の内容をファイルに書き込む処理
-            with open("output2.txt", "w", encoding="utf-8") as f:
-                f.write(editable_output2)
-            process = subprocess.Popen(
-                ["python3", "third.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
-            )
-            stdout, stderr = process.communicate()
-            process.wait()
-            if stderr:
-                st.error(f"エラー: {stderr.decode('utf-8')}")
-            if stdout:  # この部分を追加してください。
-                st.info(f"情報: {stdout.decode('utf-8')}")
-            if os.path.exists("output3.txt"):
-                with open("output3.txt", "r", encoding="utf-8") as f:
-                    output3_content = f.read()  # output3.txt の内容を読み込み
-                    output3.text(output3_content)  # 元のテキスト表示
-                    st.text_area("output3.txtの内容（コピー用）:", value=output3_content, height=200, key="output3_text_area")  # テキストエリアに表示
-            else:
-                st.write("output3.txt が見つかりません。third.py が正常に実行されていることを確認してください。")
-    if st.button("保存"):
-        h2_limit = 5
-        h3_limit = 10
-        soup = BeautifulSoup(editable_output2, "html.parser")
-        h2_count = len(soup.find_all("h2"))
-        h3_count = len(soup.find_all("h3"))
-        if h2_count > h2_limit or h3_count > h3_limit:
-            st.error(f"h2タグの数が{h2_limit}を、h3タグの数が{h3_limit}を超えています。")
-        elif not is_valid_html(editable_output2):
-            st.error("入力されたテキストは正しいHTML形式ではありません。")
-        else:
             content = editable_output2
             with open("output2.txt", "w", encoding="utf-8") as f:
                 f.write(content)
             db.upsert({"name": "output2.txt", "content": content, "timestamp": current_time.isoformat(),
-                       "keyword_id": keyword_id}, (Query().name == "output2.txt") & (Query().keyword_id == keyword_id))
             st.write("output2.txt に変更が保存されました。")
-    if st.button("データクリア"):
-        db.remove(Query().keyword_id == keyword_id)
-        st.write("データベースがクリアされました。")
 else:
     st.warning("キーワードを入力してください。")

 import os
 import subprocess
 from urllib.parse import urlparse
 from bs4 import BeautifulSoup
 import re
 from tinydb import TinyDB, Query
 import urllib.error
 import datetime
+@st.cache
 def get_top_urls_and_keyword(keyword):
+    # SecretsからGoogle APIキーとカスタム検索エンジンIDを取得
     GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
     CUSTOM_SEARCH_ENGINE_ID = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
+    # Google Customサーチ結果を取得
     service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
     response = service.cse().list(
         q=keyword,
         start=1
     ).execute()
+    # 上位3つのサイトURLを取得
     urls = [item['link'] for item in response["items"][:3]]
     return urls, keyword
 def get_valid_url(urls):
     for url in urls:
         try:
             charset = response.headers.get_content_charset()
             html = response.read().decode(charset)
             soup = BeautifulSoup(html, "html.parser")
+            # 抽出したテキストが日本語であるかどうかを判定
             if is_japanese_text(soup.get_text()):
                 return url
         except urllib.error.URLError as e:
             print(f"HTTPエラー: {e.code}")
         except:
             print("予期せぬエラーが発生しました。")
     return None
 def is_japanese_text(text):
+    # 日本語のテキストであるかどうかを判定する条件を定義
     japanese_pattern = r"[\p{Hiragana}\p{Katakana}\p{Han}ー〜、。「」【】]"
     return bool(re.search(japanese_pattern, text))
+# データベースへの接続を確立
 db = TinyDB("db.json")
+# 1日前のタイムスタンプを取得
 current_time = datetime.datetime.now()
 one_day_ago = current_time - datetime.timedelta(days=1)
+# データベースから1日前のタイムスタンプ以前のログを削除
 db.remove(Query().timestamp.test(lambda x: datetime.datetime.fromisoformat(x) <= one_day_ago))
+# タイトルと説明
+st.title("Baby Writer")
+st.write("これは、与えられたキーワードを使用して生成します。")
+# キーワード入力
 new_keyword = st.text_input("キーワード:")
+# キーワードごとにデータを保存するための識別子
 keyword_id = re.sub(r"\W+", "", new_keyword) if new_keyword else None
+# データベースから前回のキーワードを取得
 last_keyword = db.search(Query().keyword_id.exists())
 if new_keyword and (not last_keyword or last_keyword[0]['keyword_id'] != keyword_id):
+    # キーワードが変更された場合は、データベースから前回の結果を削除
     if last_keyword:
         db.remove(Query().keyword_id == last_keyword[0]['keyword_id'])
+    # output1.txt、output2.txt、output3.txtの内容をクリアする
     with open("output1.txt", "w") as f:
         f.write("")
     with open("output2.txt", "w") as f:
         f.write("")
     with open("output3.txt", "w") as f:
         f.write("")
+    # output0-1.txt、output0-2.txt、output0-3.txtを削除する
     for i in range(1, 4):
         filename = f"output0-{i}.txt"
         if os.path.exists(filename):
             os.remove(filename)
+# 新しいキーワードが入力されたときにGoogle検索を行う
 if new_keyword:
     urls, keyword = get_top_urls_and_keyword(new_keyword)
+    if len(urls) < 3:  # Google検索の結果が3つ以上であることを確認
         st.error("Google検索の結果が3つ未満です。別のキーワードを試してみてください。")
     else:
         url1, url2, url3 = urls
+if keyword_id:  # キーワードIDが存在することを確認
+        # 出力欄
+        output1 = st.empty()
+        output2 = st.empty()
+        output3 = st.empty()
+        # データベースから編集済みの "output2.txt" を読み込む処理を追加
+        result = db.search((Query().name == "output2.txt") & (Query().keyword_id == keyword_id))
+        if result:
+            editable_output2 = result[0]["content"]
+        else:
+            editable_output2 = ""
+        # runボタン
+        if st.button("構成作成", key=f"run_button_{keyword_id}"):
             with st.spinner("タイトル・見出し作成中..."):
                 urls, keyword = get_top_urls_and_keyword(new_keyword)
                 url1, url2, url3 = urls
+                # 重複チェックと同じサイト内のページチェック
                 parsed_urls = [urlparse(url) for url in urls]
                 if len(urls) != len(set(urls)):
                     st.error("異なるURLを入力してください。")
                     st.error("異なるサイトのURLを入力してください。")
                     st.stop()
+                subprocess.run(["python3", "first.py", url1, url2, url3])
             with open("output1.txt", "r", encoding="utf-8") as f:
                 content = f.read()
+            # "関連するテキスト部分:"とそれ以降の部分を削除
             content = re.sub(r"\n関連するテキスト部分:.*", "", content, flags=re.DOTALL)
             output1.text(content)
             db.upsert({"name": "output1.txt", "content": content, "keyword_id": keyword_id},
+                      (Query().name == "output1.txt") & (Query().keyword_id == keyword_id))  # データベースに結果を保存
+            subprocess.run(["python3", "second.py", keyword])
             with open("output2.txt", "r", encoding="utf-8") as f:
                 editable_output2 = f.read()
                 soup = BeautifulSoup(editable_output2, "html.parser")
                     )
             st.success("処理が完了しました。")
+        # 編集欄を表示し、編集後の内容をeditable_output2に更新
+        editable_output2 = st.text_area("output2.txtを編集してください:", value=editable_output2)
+        # run2ボタン
+        if st.button("本文作成"):
+            with st.spinner("本文作成中..."):
+                # 編集済みの "output2.txt" の内容をデータベースに保存
+                existing_docs = db.search((Query().name == "output2.txt") & (Query().keyword_id == keyword_id))
+                if existing_docs:
+                    db.update({"content": editable_output2}, doc_ids=[doc.doc_id for doc in existing_docs])
+                # データベースから読み込んだ "output2.txt" の内容をファイルに書き込む処理
+                with open("output2.txt", "w", encoding="utf-8") as f:
+                    f.write(editable_output2)
+                process = subprocess.Popen(
+                    ["python3", "third.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
+                )
+                stdout, stderr = process.communicate()
+                process.wait()
+                if stderr:
+                    st.write(f"エラー: {stderr.decode('utf-8')}")
+                if os.path.exists("output3.txt"):
+                    with open("output3.txt", "r", encoding="utf-8") as f:
+                        output3.text(f.read())
+                else:
+                    st.write("output3.txt が見つかりません。third.py が正常に実行されていることを確認してください。")
+        # 保存ボタン
+        if st.button("保存"):
             content = editable_output2
             with open("output2.txt", "w", encoding="utf-8") as f:
                 f.write(content)
             db.upsert({"name": "output2.txt", "content": content, "timestamp": current_time.isoformat(),
+                       "keyword_id": keyword_id}, (Query().name == "output2.txt") & (Query().keyword_id == keyword_id))  # データベースに変更を保存
             st.write("output2.txt に変更が保存されました。")
+        # クリアボタン
+        if st.button("データクリア"):
+            db.remove(Query().keyword_id == keyword_id)
+            st.write("データベースがクリアされました。")
 else:
     st.warning("キーワードを入力してください。")