Spaces:
Build error
Build error
Initial Commit
Browse files- .gitignore +1 -0
- app.py +121 -0
- novel2vec_skipgram_gensim4_100dim.model +3 -0
- requirements.txt +1 -0
- tests.py +4 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__/*
|
app.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from gradio import (
|
| 2 |
+
Blocks,
|
| 3 |
+
Tabs,
|
| 4 |
+
TabItem,
|
| 5 |
+
Textbox,
|
| 6 |
+
Markdown,
|
| 7 |
+
Button,
|
| 8 |
+
Group,
|
| 9 |
+
Label,
|
| 10 |
+
Row,
|
| 11 |
+
Examples,
|
| 12 |
+
)
|
| 13 |
+
from gensim.models import Word2Vec
|
| 14 |
+
import numpy as np
|
| 15 |
+
import re
|
| 16 |
+
|
| 17 |
+
novel2vec: Word2Vec = Word2Vec.load("./novel2vec_skipgram_gensim4_100dim.model")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def on_show_novel_vector_click(input_novel_title):
|
| 21 |
+
if input_novel_title not in novel2vec.wv:
|
| 22 |
+
return f"『{input_novel_title}』はデータベースにありません"
|
| 23 |
+
|
| 24 |
+
vector: np.ndarray = novel2vec.wv.get_vector(input_novel_title)
|
| 25 |
+
vector_text = " ".join(map(lambda x: "{:.3f}".format(x), vector))
|
| 26 |
+
return vector_text
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def create_show_novel_vector_tab():
|
| 30 |
+
with TabItem("小説ベクトルを見る"):
|
| 31 |
+
with Row():
|
| 32 |
+
submit_button: Button = Button("ベクトルを見る")
|
| 33 |
+
input_novel_title_textbox: Textbox = Textbox(label="小説の題名", max_lines=1)
|
| 34 |
+
output_markdown: Markdown = Markdown()
|
| 35 |
+
submit_button.click(
|
| 36 |
+
on_show_novel_vector_click,
|
| 37 |
+
inputs=input_novel_title_textbox,
|
| 38 |
+
outputs=output_markdown,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def parse_novel_formula(novel_formula):
|
| 43 |
+
splitted: list = re.split(r"([\+\-])", novel_formula)
|
| 44 |
+
splitted: list = map(lambda x: x.replace(" ", ""), splitted)
|
| 45 |
+
splitted: list = filter(lambda x: not x == "", splitted)
|
| 46 |
+
positive: list = []
|
| 47 |
+
negative: list = []
|
| 48 |
+
adding_list: list = positive
|
| 49 |
+
for x in splitted:
|
| 50 |
+
if x == "+":
|
| 51 |
+
adding_list = positive
|
| 52 |
+
elif x == "-":
|
| 53 |
+
adding_list = negative
|
| 54 |
+
else:
|
| 55 |
+
adding_list.append(x)
|
| 56 |
+
return positive, negative
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def on_start_calc_click(input_novel_formula):
|
| 60 |
+
positive, negative = parse_novel_formula(input_novel_formula)
|
| 61 |
+
unknown_titles = list(filter(lambda x: x not in novel2vec.wv, positive + negative))
|
| 62 |
+
if unknown_titles:
|
| 63 |
+
error_message = "** 以下の題名がデータベースにありません **"
|
| 64 |
+
for x in unknown_titles:
|
| 65 |
+
error_message += f"\n * {x}"
|
| 66 |
+
return None, error_message
|
| 67 |
+
|
| 68 |
+
result = novel2vec.wv.most_similar_cosmul(
|
| 69 |
+
positive=positive, negative=negative, topn=20
|
| 70 |
+
)
|
| 71 |
+
label_data = {title: confidence for title, confidence in result}
|
| 72 |
+
return label_data, ""
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def create_calc_novels_tab():
|
| 76 |
+
with TabItem("小説の演算を行う"):
|
| 77 |
+
with Group():
|
| 78 |
+
with Row():
|
| 79 |
+
submit_button = Button("演算する", variant="primary")
|
| 80 |
+
input_novel_formula_textbox = Textbox(label="小説の題名または小説式")
|
| 81 |
+
with Group():
|
| 82 |
+
output_label = Label(label="演算結果")
|
| 83 |
+
output_markdown = Markdown("")
|
| 84 |
+
with Group():
|
| 85 |
+
Examples(
|
| 86 |
+
examples=["世界から猫が消えたなら", "君の膵臓を食べたい + 涼宮ハルヒの憂鬱 - 三日間の幸福"],
|
| 87 |
+
inputs=input_novel_formula_textbox,
|
| 88 |
+
outputs=[output_label, output_markdown],
|
| 89 |
+
fn=on_start_calc_click,
|
| 90 |
+
)
|
| 91 |
+
Markdown(
|
| 92 |
+
"""
|
| 93 |
+
* 小説の題名を1つ入力→ **類似した小説を検索** できます。
|
| 94 |
+
* 小説の題名を足し算したり引き算する→ **小説を演算** できます。
|
| 95 |
+
"""
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
submit_button.click(
|
| 99 |
+
on_start_calc_click,
|
| 100 |
+
inputs=input_novel_formula_textbox,
|
| 101 |
+
outputs=[output_label, output_markdown],
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def main():
|
| 106 |
+
with Blocks() as interface:
|
| 107 |
+
Markdown("""
|
| 108 |
+
## Novel2Vec Console
|
| 109 |
+
Twitterハッシュタグ`名刺代わりの小説10選`をWord2Vecで学習させ、小説をベクトル化しました。
|
| 110 |
+
ツイートデータは[GINK03様が公開されているデータ](https://github.com/GINK03/novel_recommend/blob/master/var/shosetsu_dataset.csv)を用いました。
|
| 111 |
+
詳しくは[私の記事を参照](https://note.com/omiyayimo/n/n0301112dbcc7)ください。
|
| 112 |
+
""")
|
| 113 |
+
with Tabs():
|
| 114 |
+
create_calc_novels_tab()
|
| 115 |
+
create_show_novel_vector_tab()
|
| 116 |
+
|
| 117 |
+
interface.launch()
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
main()
|
novel2vec_skipgram_gensim4_100dim.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4cc9693de2040a63333ec5220fe6ff39adceff24f239e8d5a8b7b6d2f647cdb
|
| 3 |
+
size 3680495
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
gensim
|
tests.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def parse_novel_formula_test():
|
| 2 |
+
from app import parse_novel_formula
|
| 3 |
+
print(parse_novel_formula("-A+ B ++C-D+E"))
|
| 4 |
+
parse_novel_formula_test()
|