Spaces:
Runtime error
Runtime error
Nick Canu commited on
Commit ·
a79e4c0
1
Parent(s): abb14f6
report button and tab update
Browse files
.vscode/launch.json
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
// Use IntelliSense to learn about possible attributes.
|
| 3 |
-
// Hover to view descriptions of existing attributes.
|
| 4 |
-
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
| 5 |
-
"version": "0.2.0",
|
| 6 |
-
"configurations": [
|
| 7 |
-
{
|
| 8 |
-
"name": "Python: Module",
|
| 9 |
-
"type": "python",
|
| 10 |
-
"request": "launch",
|
| 11 |
-
"module": "streamlit",
|
| 12 |
-
"args": ["run", "Home.py"],
|
| 13 |
-
"justMyCode": true
|
| 14 |
-
}
|
| 15 |
-
]
|
| 16 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Alternate Class Files for Appendix/Community Aggregation - Input Manager.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#Alternative input manager for description generator
|
| 2 |
+
class input_manager:
|
| 3 |
+
#initialize key dictionary from vector data frame and set community top N
|
| 4 |
+
def __init__(self,key_df, slim_df, search_tokens, top_n=10):
|
| 5 |
+
self.key_df = key_df
|
| 6 |
+
self.slim_df = slim_df
|
| 7 |
+
self.search_tokens = search_tokens
|
| 8 |
+
self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
|
| 9 |
+
self.top_n = top_n
|
| 10 |
+
self.nlp = spacy.load("en_core_web_md")
|
| 11 |
+
#translate input text to vector
|
| 12 |
+
def set_input(self,input_cats):
|
| 13 |
+
|
| 14 |
+
#need setup to apply correct group tag to values
|
| 15 |
+
#separate known/unknown features
|
| 16 |
+
k_flags = [cat for cat in input_cats if cat in list(self.key.keys())]
|
| 17 |
+
unk_flags = [cat for cat in input_cats if cat not in list(self.key.keys())]
|
| 18 |
+
|
| 19 |
+
#process within feature class similarity for each unknown input
|
| 20 |
+
if len(unk_flags)>0:
|
| 21 |
+
outs = []
|
| 22 |
+
|
| 23 |
+
for word in unk_flags:
|
| 24 |
+
if re.match(r"game_type_",word):
|
| 25 |
+
tok = self.nlp(word.split("_")[-1])
|
| 26 |
+
mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[0]],key=itemgetter(1))
|
| 27 |
+
#if no known match is found (model doesn't recognize input word), we're going to discard - other solutions performance prohibitive
|
| 28 |
+
if mtch[1]>0:
|
| 29 |
+
outs.append("game_type_"+mtch[0])
|
| 30 |
+
elif re.match(r"mechanic_",word):
|
| 31 |
+
tok = self.nlp(word.split("_")[-1])
|
| 32 |
+
mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[1]],key=itemgetter(1))
|
| 33 |
+
if mtch[1]>0:
|
| 34 |
+
outs.append("mechanic_"+mtch[0])
|
| 35 |
+
elif re.match(r"category_",word):
|
| 36 |
+
tok = self.nlp(word.split("_")[-1])
|
| 37 |
+
mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[2]],key=itemgetter(1))
|
| 38 |
+
if mtch[1]>0:
|
| 39 |
+
outs.append("category_"+mtch[0])
|
| 40 |
+
elif re.match(r"family_",word):
|
| 41 |
+
tok = self.nlp(word.split("_")[-1])
|
| 42 |
+
mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[3]],key=itemgetter(1))
|
| 43 |
+
if mtch[1]>0:
|
| 44 |
+
outs.append("family_"+str(mtch[0]))
|
| 45 |
+
|
| 46 |
+
#if unks are processed, rejoin nearest match to known.
|
| 47 |
+
k_flags = list(set(k_flags+outs))
|
| 48 |
+
|
| 49 |
+
#preserve global key and ouput copy w/input keys activated to 1
|
| 50 |
+
d = self.key.copy()
|
| 51 |
+
for cat in k_flags:
|
| 52 |
+
d[cat] = 1.0
|
| 53 |
+
return d
|
| 54 |
+
|
| 55 |
+
def input_parser(self,in_vec):
|
| 56 |
+
#extracting keys from processed vector
|
| 57 |
+
ks = [k for k,v in in_vec.items() if v == 1]
|
| 58 |
+
|
| 59 |
+
#finding raw "total" match score - how many of the how input columns are hot in each existing vector
|
| 60 |
+
inter = self.key_df[ks].sum(axis=1)
|
| 61 |
+
|
| 62 |
+
#performing operation on each df seems to be slightly quicker than transforming the df here - may refactor though
|
| 63 |
+
|
| 64 |
+
#dropping any row without 3 matches (minimum match check)
|
| 65 |
+
cand_vec = self.key_df.iloc[list(inter[inter>=3].index)]
|
| 66 |
+
#if parsing returns less ranked matches than specificed top n, reduce threshold to 1 match and check again
|
| 67 |
+
if len(cand_vec) < self.top_n:
|
| 68 |
+
cand_vec = self.key_df.iloc[list(inter[inter>=1].index)]
|
| 69 |
+
|
| 70 |
+
cand_slim = self.slim_df.iloc[list(inter[inter>=3].index)]
|
| 71 |
+
if len(cand_slim) < self.top_n:
|
| 72 |
+
cand_slim = self.key_df.iloc[list(inter[inter>=1].index)]
|
| 73 |
+
|
| 74 |
+
return ks,cand_slim,in_vec.values()
|
| 75 |
+
|
| 76 |
+
#calculating per community vector pairwise jaccard similarity to input split by feature class
|
| 77 |
+
def ret_jaccard(self,in_vec,t_vec):
|
| 78 |
+
gt_score = sklearn.metrics.jaccard_score(in_vec[1:9],t_vec[1:9],zero_division=0)
|
| 79 |
+
cat_score = sklearn.metrics.jaccard_score(in_vec[192:276],t_vec[192:276],zero_division=0)
|
| 80 |
+
mech_score = sklearn.metrics.jaccard_score(in_vec[9:192],t_vec[9:192],zero_division=0)
|
| 81 |
+
fam_score = sklearn.metrics.jaccard_score(in_vec[276:3901],t_vec[276:3901],zero_division=0)
|
| 82 |
+
if in_vec[0] == t_vec[0]:
|
| 83 |
+
coop_score = 1
|
| 84 |
+
else:
|
| 85 |
+
coop_score = 0
|
| 86 |
+
|
| 87 |
+
#initial weighting treats all feature classes as equal - looking into updating this as a feedback mechanism
|
| 88 |
+
return np.mean([gt_score,cat_score,mech_score,fam_score,coop_score])
|
| 89 |
+
|
| 90 |
+
#function to actually return community neighbors
|
| 91 |
+
def n_neighbors(self,in_data):
|
| 92 |
+
#applies jaccard func to each row using vectors and maps to "full" df w/text
|
| 93 |
+
slim, vec, in_vec = in_data
|
| 94 |
+
vec['score']=vec.apply(lambda x: self.ret_jaccard(in_vec,x),raw=True,axis=1)
|
| 95 |
+
slim['score']=vec['score']
|
| 96 |
+
|
| 97 |
+
#converts to rank - this avoids splitting equal scoring groups inappropriately
|
| 98 |
+
slim['rank'] = slim['score'].rank(ascending=False)
|
| 99 |
+
return slim[slim['rank']<self.top_n].sort_values(by=['rank'])
|
| 100 |
+
|
| 101 |
+
def query_score(self,outframe, gen_text):
|
| 102 |
+
#requires text processing function, nearest neighbor community dataframe, and piece of generated text
|
| 103 |
+
query = doc_text_preprocessing(pd.Series(gen_text))
|
| 104 |
+
desc_tokens = pd.concat([outframe['cleaned_descriptions'],pd.Series(query)])
|
| 105 |
+
desc_dict = corpora.Dictionary()
|
| 106 |
+
desc_corpus = [desc_dict.doc2bow(doc, allow_update=True) for doc in desc_tokens]
|
| 107 |
+
temp_index = get_tmpfile("index")
|
| 108 |
+
index = similarities.Similarity(temp_index, desc_corpus, num_features=len(desc_dict.token2id))
|
| 109 |
+
|
| 110 |
+
sim_stack = []
|
| 111 |
+
for sims in index:
|
| 112 |
+
sim_stack.append(sims)
|
| 113 |
+
|
| 114 |
+
return (gen_text,np.mean(np.multiply(out['score'],sim_stack[-1][:-1])))
|
Home.py
CHANGED
|
@@ -2,6 +2,8 @@ import streamlit as st
|
|
| 2 |
|
| 3 |
st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
|
| 4 |
|
|
|
|
|
|
|
| 5 |
def application():
|
| 6 |
###Imports
|
| 7 |
import pandas as pd
|
|
@@ -14,6 +16,7 @@ def application():
|
|
| 14 |
from title_generator import Title_Generator
|
| 15 |
import gzip
|
| 16 |
import io
|
|
|
|
| 17 |
from description_generator import input_manager, model_control
|
| 18 |
from pathlib import Path
|
| 19 |
|
|
@@ -39,7 +42,7 @@ def application():
|
|
| 39 |
if 'coop_d' not in st.session_state:
|
| 40 |
st.session_state.coop_d = 0
|
| 41 |
|
| 42 |
-
#
|
| 43 |
#reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
|
| 44 |
def reader(path):
|
| 45 |
f = gzip.GzipFile(filename=path)
|
|
@@ -81,7 +84,7 @@ def application():
|
|
| 81 |
inter_pair = Tgen.candidate_generator(clean_desc)
|
| 82 |
out = Tgen.candidate_score(inter_pair,ex_check)
|
| 83 |
descs.append(out)
|
| 84 |
-
|
| 85 |
st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
|
| 86 |
|
| 87 |
def title_check(next=0):
|
|
@@ -131,8 +134,23 @@ def application():
|
|
| 131 |
st.session_state.title_iter = 0
|
| 132 |
show_title(0)
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
-
|
| 136 |
###Variables
|
| 137 |
|
| 138 |
###Data
|
|
@@ -161,8 +179,6 @@ def application():
|
|
| 161 |
|
| 162 |
Tgen, iman, mctrl = setup_models()
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
| 166 |
#UI
|
| 167 |
|
| 168 |
#Application
|
|
@@ -336,7 +352,9 @@ def application():
|
|
| 336 |
|
| 337 |
with d_col2:
|
| 338 |
st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
|
| 339 |
-
|
|
|
|
|
|
|
| 340 |
def blog():
|
| 341 |
"""
|
| 342 |
Blog describing the Auto-BG project
|
|
@@ -386,12 +404,11 @@ def about_us():
|
|
| 386 |
*MADS (Master of Applied Data Science)*\n
|
| 387 |
""")
|
| 388 |
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
"Blog": blog,
|
| 392 |
-
"About Us": about_us,
|
| 393 |
-
}
|
| 394 |
|
| 395 |
-
|
| 396 |
-
|
| 397 |
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
st.set_page_config(page_title='Auto-BG: The Game Concept Generator', layout='wide')
|
| 4 |
|
| 5 |
+
tab1, tab2, tab3 = st.tabs(['App', 'Blog', 'About Us'])
|
| 6 |
+
|
| 7 |
def application():
|
| 8 |
###Imports
|
| 9 |
import pandas as pd
|
|
|
|
| 16 |
from title_generator import Title_Generator
|
| 17 |
import gzip
|
| 18 |
import io
|
| 19 |
+
from datetime import date
|
| 20 |
from description_generator import input_manager, model_control
|
| 21 |
from pathlib import Path
|
| 22 |
|
|
|
|
| 42 |
if 'coop_d' not in st.session_state:
|
| 43 |
st.session_state.coop_d = 0
|
| 44 |
|
| 45 |
+
#helper functions
|
| 46 |
#reader code extended from https://gist.github.com/thearn/5424244 for alternate load format
|
| 47 |
def reader(path):
|
| 48 |
f = gzip.GzipFile(filename=path)
|
|
|
|
| 84 |
inter_pair = Tgen.candidate_generator(clean_desc)
|
| 85 |
out = Tgen.candidate_score(inter_pair,ex_check)
|
| 86 |
descs.append(out)
|
| 87 |
+
results.success("Prompt " +str(status+1)+ "/3 Generated!")
|
| 88 |
st.session_state.output_dict = {0:descs[0],1:descs[1],2:descs[2]}
|
| 89 |
|
| 90 |
def title_check(next=0):
|
|
|
|
| 134 |
st.session_state.title_iter = 0
|
| 135 |
show_title(0)
|
| 136 |
|
| 137 |
+
def report():
|
| 138 |
+
inputs = '|'.join(str(x) for x in st.session_state.inputs)
|
| 139 |
+
data = {'rprtd': date.today(),'inpts': inputs, 'title': st.session_state.output_dict[st.session_state.desc_iter]['titles'][st.session_state.title_iter][0], 'desc':st.session_state.output_dict[st.session_state.desc_iter]['text']}
|
| 140 |
+
try:
|
| 141 |
+
r_df = pd.DataFrame(data, index=[0])
|
| 142 |
+
r_p = pd.read_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
|
| 143 |
+
w_p = pd.concat([r_df, r_p])
|
| 144 |
+
w_p = w_p.drop_duplicates()
|
| 145 |
+
print('try')
|
| 146 |
+
print(w_p)
|
| 147 |
+
w_p.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
|
| 148 |
+
except:
|
| 149 |
+
print('except')
|
| 150 |
+
print(r_df)
|
| 151 |
+
r_df.to_pickle(Path(__file__).parent / "Persistent_Data/reported_df.PICKLE")
|
| 152 |
|
| 153 |
+
|
| 154 |
###Variables
|
| 155 |
|
| 156 |
###Data
|
|
|
|
| 179 |
|
| 180 |
Tgen, iman, mctrl = setup_models()
|
| 181 |
|
|
|
|
|
|
|
| 182 |
#UI
|
| 183 |
|
| 184 |
#Application
|
|
|
|
| 352 |
|
| 353 |
with d_col2:
|
| 354 |
st.button("See Next Description", on_click=ND_button_clicked, use_container_width=True)
|
| 355 |
+
|
| 356 |
+
st.button('Report', on_click=report, use_container_width=True)
|
| 357 |
+
|
| 358 |
def blog():
|
| 359 |
"""
|
| 360 |
Blog describing the Auto-BG project
|
|
|
|
| 404 |
*MADS (Master of Applied Data Science)*\n
|
| 405 |
""")
|
| 406 |
|
| 407 |
+
with tab1:
|
| 408 |
+
application()
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
+
with tab2:
|
| 411 |
+
blog()
|
| 412 |
|
| 413 |
+
with tab3:
|
| 414 |
+
about_us()
|