Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- dir/__pycache__/DCS.cpython-310.pyc +0 -0
- dir/__pycache__/DCS.cpython-36.pyc +0 -0
- dir/__pycache__/ECL_MST.cpython-310.pyc +0 -0
- dir/__pycache__/ECL_MST.cpython-36.pyc +0 -0
- dir/__pycache__/MatDB.cpython-310.pyc +0 -0
- dir/__pycache__/MatDB.cpython-36.pyc +0 -0
- dir/__pycache__/TestPool_Unit_clique.cpython-310.pyc +0 -0
- dir/__pycache__/Train_clique.cpython-310.pyc +0 -0
- dir/__pycache__/Train_clique.cpython-36.pyc +0 -0
- dir/__pycache__/heap_n_clique.cpython-310.pyc +0 -0
- dir/__pycache__/nnet.cpython-310.pyc +0 -0
- dir/__pycache__/nnet.cpython-36.pyc +0 -0
- dir/__pycache__/romtoslp.cpython-310.pyc +0 -0
- dir/__pycache__/romtoslp.cpython-36.pyc +0 -0
- dir/__pycache__/sentences.cpython-310.pyc +0 -0
- dir/__pycache__/sentences.cpython-36.pyc +0 -0
- dir/__pycache__/utilities.cpython-310.pyc +0 -0
- dir/__pycache__/utilities.cpython-36.pyc +0 -0
- dir/__pycache__/wordTypeCheckFunction.cpython-310.pyc +0 -0
- dir/__pycache__/wordTypeCheckFunction.cpython-36.pyc +0 -0
- dir/__pycache__/word_definite.cpython-310.pyc +0 -0
- dir/__pycache__/word_definite.cpython-36.pyc +0 -0
- dir/bucket_by_conflicting_nodes_5.py +73 -0
- dir/outputs/BM2_NLoss_proc0.csv +0 -0
- dir/outputs/BM2_NLoss_proc1.csv +0 -0
- dir/outputs/BM2_NLoss_proc2.csv +0 -0
- dir/outputs/BM2_NLoss_proc3.csv +0 -0
- dir/outputs/nnet_LOGS.csv +13 -0
- dir/outputs/train_t7978754709018/nnet_e0_i250.p +3 -0
- dir/sh_TestPool_MP.py +168 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/11095.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/132845.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/13718.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/13978.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/150705.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/155358.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/160568.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/1618.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/161938.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/164353.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/166502.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/185308.ds.bz2 +0 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/20753.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/222129.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/224349.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/231785.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/236526.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/24384.ds.bz2 +3 -0
- wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/258696.ds.bz2 +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
dir/outputs/train_t7978754709018/nnet_e0_i250.p filter=lfs diff=lfs merge=lfs -text
|
dir/__pycache__/DCS.cpython-310.pyc
ADDED
|
Binary file (957 Bytes). View file
|
|
|
dir/__pycache__/DCS.cpython-36.pyc
ADDED
|
Binary file (951 Bytes). View file
|
|
|
dir/__pycache__/ECL_MST.cpython-310.pyc
ADDED
|
Binary file (3.32 kB). View file
|
|
|
dir/__pycache__/ECL_MST.cpython-36.pyc
ADDED
|
Binary file (3.33 kB). View file
|
|
|
dir/__pycache__/MatDB.cpython-310.pyc
ADDED
|
Binary file (1.6 kB). View file
|
|
|
dir/__pycache__/MatDB.cpython-36.pyc
ADDED
|
Binary file (1.59 kB). View file
|
|
|
dir/__pycache__/TestPool_Unit_clique.cpython-310.pyc
ADDED
|
Binary file (1.48 kB). View file
|
|
|
dir/__pycache__/Train_clique.cpython-310.pyc
ADDED
|
Binary file (15.9 kB). View file
|
|
|
dir/__pycache__/Train_clique.cpython-36.pyc
ADDED
|
Binary file (15.5 kB). View file
|
|
|
dir/__pycache__/heap_n_clique.cpython-310.pyc
ADDED
|
Binary file (6.4 kB). View file
|
|
|
dir/__pycache__/nnet.cpython-310.pyc
ADDED
|
Binary file (6.71 kB). View file
|
|
|
dir/__pycache__/nnet.cpython-36.pyc
ADDED
|
Binary file (7.05 kB). View file
|
|
|
dir/__pycache__/romtoslp.cpython-310.pyc
ADDED
|
Binary file (715 Bytes). View file
|
|
|
dir/__pycache__/romtoslp.cpython-36.pyc
ADDED
|
Binary file (727 Bytes). View file
|
|
|
dir/__pycache__/sentences.cpython-310.pyc
ADDED
|
Binary file (6.11 kB). View file
|
|
|
dir/__pycache__/sentences.cpython-36.pyc
ADDED
|
Binary file (6.26 kB). View file
|
|
|
dir/__pycache__/utilities.cpython-310.pyc
ADDED
|
Binary file (7.3 kB). View file
|
|
|
dir/__pycache__/utilities.cpython-36.pyc
ADDED
|
Binary file (7.37 kB). View file
|
|
|
dir/__pycache__/wordTypeCheckFunction.cpython-310.pyc
ADDED
|
Binary file (7.44 kB). View file
|
|
|
dir/__pycache__/wordTypeCheckFunction.cpython-36.pyc
ADDED
|
Binary file (6.7 kB). View file
|
|
|
dir/__pycache__/word_definite.cpython-310.pyc
ADDED
|
Binary file (78.3 kB). View file
|
|
|
dir/__pycache__/word_definite.cpython-36.pyc
ADDED
|
Binary file (78.3 kB). View file
|
|
|
dir/bucket_by_conflicting_nodes_5.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import os
|
| 3 |
+
import bz2
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def harmonic(P, R):
|
| 7 |
+
"""For Calculation of F-Score since it is the HM of P and R"""
|
| 8 |
+
return(2 * P * R / float(P + R))
|
| 9 |
+
# Test on a couple of files
|
| 10 |
+
base_path_csv = '/home/rs/15CS91R05/gaurav/myTryouts/init_results/prediction_csvs/'
|
| 11 |
+
base_path_bz2 = '/home/rs/15CS91R05/Bishal/NewData/skt_dcs_DS.bz2_1L_bigram_heldout_dev/'
|
| 12 |
+
|
| 13 |
+
pred_csvs = os.listdir(base_path_csv)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
"""Task 5: See data from number of conflicts
|
| 17 |
+
Approach: Select a node from DCS, take count of conflicting nodes using the conflictsDict_correct
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
# Function to open bz2 files (that contains both DCS & SKT info)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def open_dsbz2(filename):
|
| 24 |
+
with bz2.BZ2File(filename, 'r') as f:
|
| 25 |
+
loader = pickle.load(f)
|
| 26 |
+
|
| 27 |
+
conflicts_Dict_correct = loader['conflicts_Dict_correct']
|
| 28 |
+
nodelist_to_correct_mapping = loader['nodelist_to_correct_mapping']
|
| 29 |
+
nodelist_correct = loader['nodelist_correct']
|
| 30 |
+
featVMat_correct = loader['featVMat_correct']
|
| 31 |
+
featVMat = loader['featVMat']
|
| 32 |
+
conflicts_Dict = loader['conflicts_Dict']
|
| 33 |
+
nodelist = loader['nodelist']
|
| 34 |
+
|
| 35 |
+
return (nodelist_correct, conflicts_Dict_correct, featVMat_correct, nodelist_to_correct_mapping,\
|
| 36 |
+
nodelist, conflicts_Dict, featVMat)
|
| 37 |
+
|
| 38 |
+
bucket_by_conflicting_nodes = {}
|
| 39 |
+
num_conflicting_nodes = set()
|
| 40 |
+
csv = open(base_path_csv + pred_csvs[5], 'r').readlines()
|
| 41 |
+
|
| 42 |
+
for line in range(0, len(csv), 6):
|
| 43 |
+
head_line = csv[line].strip().split(',')
|
| 44 |
+
fname = head_line[0]
|
| 45 |
+
print("Bz2 File number", fname, line / 6)
|
| 46 |
+
|
| 47 |
+
(nodelist_correct, conflicts_Dict_correct, featVMat_correct, nodelist_to_correct_mapping,\
|
| 48 |
+
nodelist, conflicts_Dict, featVMat) = open_dsbz2(base_path_bz2 + fname + '.ds.bz2')
|
| 49 |
+
|
| 50 |
+
assert len(nodelist_correct) == len(conflicts_Dict_correct)
|
| 51 |
+
for node in conflicts_Dict_correct:
|
| 52 |
+
lemma = nodelist_correct[node].lemma
|
| 53 |
+
conflicting_nodes_count = len(conflicts_Dict_correct[node])
|
| 54 |
+
if conflicting_nodes_count not in num_conflicting_nodes:
|
| 55 |
+
num_conflicting_nodes.add(conflicting_nodes_count)
|
| 56 |
+
bucket_by_conflicting_nodes[conflicting_nodes_count] = {'lemmas': set(), 'precision': [0, 0], 'recall': [0, 0]}
|
| 57 |
+
|
| 58 |
+
bucket_by_conflicting_nodes[conflicting_nodes_count]['lemmas'].add(lemma)
|
| 59 |
+
|
| 60 |
+
data = csv[line + 5].strip().split(',')
|
| 61 |
+
word_recall = float(data[1]) / float(data[3])
|
| 62 |
+
lemma_recall = float(data[2]) / float(data[3])
|
| 63 |
+
word_precision = float(data[1]) / float(data[4])
|
| 64 |
+
lemma_precision = float(data[2]) / float(data[4])
|
| 65 |
+
|
| 66 |
+
bucket_by_conflicting_nodes[conflicting_nodes_count]['recall'][0] += word_recall
|
| 67 |
+
bucket_by_conflicting_nodes[conflicting_nodes_count]['recall'][1] += lemma_recall
|
| 68 |
+
bucket_by_conflicting_nodes[conflicting_nodes_count]['precision'][0] += word_precision
|
| 69 |
+
bucket_by_conflicting_nodes[conflicting_nodes_count]['precision'][1] += lemma_precision
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
with open('final_task_gaurav/bucket_by_conflicting_nodes_5.p', 'wb') as f:
|
| 73 |
+
pickle.dump(bucket_by_conflicting_nodes, f)
|
dir/outputs/BM2_NLoss_proc0.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dir/outputs/BM2_NLoss_proc1.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dir/outputs/BM2_NLoss_proc2.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dir/outputs/BM2_NLoss_proc3.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dir/outputs/nnet_LOGS.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
odir,p_name,hidden_layer_size,_edge_vector_dim
|
| 2 |
+
outputs/train_t8910478283364,outputs/train_t8910478283364/nnet.p,1200,1500,wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 3 |
+
outputs/train_t8910591553006,outputs/train_t8910591553006/nnet.p,1200,1500,wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 4 |
+
outputs/train_t8911603194394,outputs/train_t8911603194394/nnet.p,1200,1500,wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 5 |
+
outputs/train_t8936290095166,outputs/train_t8936290095166/nnet.p,1200,1500,wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 6 |
+
outputs/train_t9360928477371,outputs/train_t9360928477371/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 7 |
+
outputs/train_t9557657979896,outputs/train_t9557657979896/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 8 |
+
outputs/train_t50078795761,outputs/train_t50078795761/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 9 |
+
outputs/train_t3896665073989,outputs/train_t3896665073989/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 10 |
+
outputs/train_t3896696274425,outputs/train_t3896696274425/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 11 |
+
outputs/train_t3896890268250,outputs/train_t3896890268250/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 12 |
+
outputs/train_t3896948866803,outputs/train_t3896948866803/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
| 13 |
+
outputs/train_t3896991835776,outputs/train_t3896991835776/nnet.p,1200,1500,../wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/
|
dir/outputs/train_t7978754709018/nnet_e0_i250.p
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b224a77e8418b829f233fe6080bd5f1d596f22e4a735f696d0e6df4032a9e7a5
|
| 3 |
+
size 14419601
|
dir/sh_TestPool_MP.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import multiprocessing as mp
|
| 2 |
+
import TestPool_Unit
|
| 3 |
+
from shutil import copyfile
|
| 4 |
+
import numpy as np
|
| 5 |
+
import time
|
| 6 |
+
import sys
|
| 7 |
+
from optparse import OptionParser
|
| 8 |
+
|
| 9 |
+
from collections import defaultdict
|
| 10 |
+
|
| 11 |
+
def Evaluate(result_arr):
|
| 12 |
+
print('Files Processed: ', len(result_arr))
|
| 13 |
+
recalls = []
|
| 14 |
+
recalls_of_word = []
|
| 15 |
+
precisions = []
|
| 16 |
+
precisions_of_words = []
|
| 17 |
+
fully_Correct_l = 0
|
| 18 |
+
fully_Correct_w = 0
|
| 19 |
+
for entry in result_arr:
|
| 20 |
+
(word_match, lemma_match, n_dcsWords, n_output_nodes) = entry
|
| 21 |
+
recalls.append(lemma_match/n_dcsWords)
|
| 22 |
+
recalls_of_word.append(word_match/n_dcsWords)
|
| 23 |
+
|
| 24 |
+
precisions.append(lemma_match/n_output_nodes)
|
| 25 |
+
precisions_of_words.append(word_match/n_output_nodes)
|
| 26 |
+
if lemma_match == n_dcsWords:
|
| 27 |
+
fully_Correct_l += 1
|
| 28 |
+
if word_match == n_dcsWords:
|
| 29 |
+
fully_Correct_w += 1
|
| 30 |
+
print('Avg. Micro Recall of Lemmas: {}'.format(np.mean(np.array(recalls))))
|
| 31 |
+
print('Avg. Micro Recall of Words: {}'.format(np.mean(np.array(recalls_of_word))))
|
| 32 |
+
print('Avg. Micro Precision of Lemmas: {}'.format(np.mean(np.array(precisions))))
|
| 33 |
+
print('Avg. Micro Precision of Words: {}'.format(np.mean(np.array(precisions_of_words))))
|
| 34 |
+
rl = np.mean(np.array(recalls))
|
| 35 |
+
pl = np.mean(np.array(precisions))
|
| 36 |
+
print('F-Score of Lemmas: ', (2*pl*rl)/(pl+rl))
|
| 37 |
+
print('Fully Correct Lemmawise: {}'.format(fully_Correct_l/len(recalls_of_word)))
|
| 38 |
+
print('Fully Correct Wordwise: {}'.format(fully_Correct_w/len(recalls_of_word)))
|
| 39 |
+
print('[{:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f}]'.format(100*np.mean(np.array(recalls)), 100*np.mean(np.array(recalls_of_word)), 100*np.mean(np.array(precisions)), \
|
| 40 |
+
100*np.mean(np.array(precisions_of_words)), 100*(2*pl*rl)/(pl+rl), 100*fully_Correct_l/len(recalls_of_word),\
|
| 41 |
+
100*fully_Correct_w/len(recalls_of_word)))
|
| 42 |
+
sys.stdout.flush()
|
| 43 |
+
|
| 44 |
+
tag = None
|
| 45 |
+
proc_count = 4
|
| 46 |
+
|
| 47 |
+
def main():
|
| 48 |
+
global proc_count, tag
|
| 49 |
+
ho_folders = {
|
| 50 |
+
'PR2': 'skt_dcs_DS.bz2_4K_pmi_rfe_heldout',
|
| 51 |
+
'BR2': 'skt_dcs_DS.bz2_4K_bigram_rfe_heldout',
|
| 52 |
+
'PM2': 'skt_dcs_DS.bz2_4K_pmi_mir_heldout',
|
| 53 |
+
'BM2': 'skt_dcs_DS.bz2_4K_bigram_mir_heldout',
|
| 54 |
+
'PR3': 'skt_dcs_DS.bz2_1L_pmi_rfe_heldout',
|
| 55 |
+
'BR3': 'skt_dcs_DS.bz2_1L_bigram_rfe_heldout',
|
| 56 |
+
'PM3': 'skt_dcs_DS.bz2_1L_pmi_mir_heldout_again',
|
| 57 |
+
'BM3': 'skt_dcs_DS.bz2_1L_bigram_heldout'
|
| 58 |
+
}
|
| 59 |
+
modelList = {
|
| 60 |
+
'PR2': 'outputs/train_{}/nnet_e1_i400.p'.format('t2788294192566'),
|
| 61 |
+
'BR2': 'outputs/train_{}/nnet_e1_i400.p'.format('t2789415023871'),
|
| 62 |
+
'PM2': 'outputs/train_{}/nnet_e1_i400.p'.format('t2753954441900'),
|
| 63 |
+
'BM2': 'outputs/train_{}/nnet_e1_i400.p'.format('t3401216067518'),
|
| 64 |
+
'PR3': 'outputs/train_{}/nnet_e1_i400.p'.format('t2761370242287'),
|
| 65 |
+
'BR3': 'outputs/train_{}/nnet_e1_i400.p'.format('t2779114903467'),
|
| 66 |
+
'PM3': 'outputs/train_{}/nnet_e1_i400.p'.format('t2756013734745'),
|
| 67 |
+
'BM3': 'outputs/train_{}/nnet_e1_i400.p'.format('t3471903174862')
|
| 68 |
+
}
|
| 69 |
+
modelFile = modelList[tag]
|
| 70 |
+
print('Tag: {}, ModelFile: {}'.format(tag, modelFile))
|
| 71 |
+
print('ProcCount: {}'.format(proc_count))
|
| 72 |
+
_dump = True
|
| 73 |
+
if _dump:
|
| 74 |
+
_outFile = 'outputs/dump_predictions/{}_NLoss'.format(tag)
|
| 75 |
+
else:
|
| 76 |
+
_outFile = None
|
| 77 |
+
print('OutFile: ', _outFile)
|
| 78 |
+
|
| 79 |
+
# Backup the model file
|
| 80 |
+
copyfile(modelFile, modelFile + '.bk')
|
| 81 |
+
|
| 82 |
+
# Create Queue, Result array
|
| 83 |
+
queue = mp.Queue()
|
| 84 |
+
result_arr = []
|
| 85 |
+
|
| 86 |
+
print('Source: ', '../NewData/{}/'.format(ho_folders[tag]))
|
| 87 |
+
# Start 6 workers - 8 slows down the pc
|
| 88 |
+
# proc_count = 4
|
| 89 |
+
procs = [None]*proc_count
|
| 90 |
+
for i in range(proc_count):
|
| 91 |
+
vpid = i
|
| 92 |
+
procs[i] = mp.Process(target = TestPool_Unit.pooled_Test, args = \
|
| 93 |
+
(modelFile, vpid, queue, '../NewData/{}/'.format(ho_folders[tag]), int(9600/proc_count), _dump, _outFile))
|
| 94 |
+
# Start Processes
|
| 95 |
+
for i in range(proc_count):
|
| 96 |
+
procs[i].start()
|
| 97 |
+
|
| 98 |
+
# Fetch partial results
|
| 99 |
+
stillRunning = True
|
| 100 |
+
printer_timer = 100
|
| 101 |
+
while stillRunning:
|
| 102 |
+
stillRunning = False
|
| 103 |
+
for i in range(proc_count):
|
| 104 |
+
p = procs[i]
|
| 105 |
+
# print('Process with\t vpid: {}\t ->\t pid: {}\t ->\t running status: {}'.format(i, p.pid, p.is_alive()))
|
| 106 |
+
if p.is_alive():
|
| 107 |
+
stillRunning = True
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
if printer_timer == 0:
|
| 111 |
+
printer_timer = 100
|
| 112 |
+
while not queue.empty():
|
| 113 |
+
result_arr.append(queue.get())
|
| 114 |
+
# Evaluate results till now
|
| 115 |
+
if len(result_arr) > 0:
|
| 116 |
+
Evaluate(result_arr)
|
| 117 |
+
|
| 118 |
+
printer_timer -= 1
|
| 119 |
+
|
| 120 |
+
time.sleep(1)
|
| 121 |
+
while not queue.empty():
|
| 122 |
+
result_arr.append(queue.get())
|
| 123 |
+
Evaluate(result_arr)
|
| 124 |
+
for i in range(proc_count):
|
| 125 |
+
procs[i].join()
|
| 126 |
+
def setArgs(_tag, _pc):
|
| 127 |
+
global proc_count, tag
|
| 128 |
+
tag = _tag
|
| 129 |
+
proc_count = _pc
|
| 130 |
+
print('Tag, ProcCount: {}, {}'.format(tag, proc_count))
|
| 131 |
+
|
| 132 |
+
if __name__ == '__main__':
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
#print('Number of arguments:', len(sys.argv), 'arguments.')
|
| 136 |
+
#print('Argument List:', str(sys.argv))
|
| 137 |
+
parser = OptionParser()
|
| 138 |
+
parser.add_option("-t", "--tag", dest="tag",
|
| 139 |
+
help="Tag for feature set to use", metavar="TAG")
|
| 140 |
+
parser.add_option("-p", "--procs", dest="proc_count", default = 4,
|
| 141 |
+
help="Number of child process", metavar="PROCS")
|
| 142 |
+
|
| 143 |
+
(options, args) = parser.parse_args()
|
| 144 |
+
|
| 145 |
+
options = vars(options)
|
| 146 |
+
_tag = options['tag']
|
| 147 |
+
if _tag is None:
|
| 148 |
+
raise Exception('None is tag')
|
| 149 |
+
pc = int(options['proc_count'])
|
| 150 |
+
setArgs(_tag, pc)
|
| 151 |
+
|
| 152 |
+
main()
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/11095.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3c2170c1ba5c7b2a4f737ad4596a85b26612f432d814803145cdd22cc3999a2
|
| 3 |
+
size 2500609
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/132845.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b360d0e798ebccb039dad4231de59be4dee87bc06476aced3518b91f602545d3
|
| 3 |
+
size 959708
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/13718.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46da78eff48361f7e8c6ac3f89a9df7874bf6d7a9374188cad45e5413180e8a6
|
| 3 |
+
size 2041406
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/13978.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24ad9067a6e3d3616cdfaefb504c9436c48fd719eb7e356c024e350eef7fee10
|
| 3 |
+
size 1570927
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/150705.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9d37c3f9c39e8077373c5c6f1bc5b5ef9f41496e305bcac955eae6d91f6e313
|
| 3 |
+
size 1918875
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/155358.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a163a51491c67ebe9332fe6b57087b12fcfd64b76cc2de7a5d7939dea851cabf
|
| 3 |
+
size 2405589
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/160568.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c83e837985d3f6a0c5a1f239ed859512982beda9388d0dd92f098eec4afef046
|
| 3 |
+
size 976570
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/1618.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f291f2149e787e524f8dc1cc4d19e4836755ca56364c559f4943348b08689aee
|
| 3 |
+
size 246995
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/161938.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08f7bb19546410d8bcef7b19fd59b92a44ca879e30c2fadef6a561f3b8879337
|
| 3 |
+
size 1187507
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/164353.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c85d3719a80acf4f041875682f48a358d888319f3ad9f0e285e2c04b8ba7289f
|
| 3 |
+
size 835483
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/166502.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2fc184db4031295245293381ab99a0618b6f8da3c34a983946611b5768713c7
|
| 3 |
+
size 705303
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/185308.ds.bz2
ADDED
|
File without changes
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/20753.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5a5757449519ac987c87037e9977ac7c70352811faf3eb1ee20a6670e77e183
|
| 3 |
+
size 2319924
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/222129.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a955abc1af4e3ea82652b3100c6ffb682e4a6918d3fc668dfbc17380d59e205c
|
| 3 |
+
size 1172882
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/224349.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fecac2300d4b5b80dedf8b5cf031b8682f44496da2b3d7543452d2ad1c83599
|
| 3 |
+
size 5219326
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/231785.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fecc73f0a71f378582e493c72e11a3dc586294b12b195ee7fd05fd42d8293a8
|
| 3 |
+
size 2326312
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/236526.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1db3aba248d9f6c4c9e8aac4d6a604b28b914b8f8733b7ddff91e944e6982d36
|
| 3 |
+
size 5333884
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/24384.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac35fcdd721ce8734b1747ceebf3fa398eea3caa0cc79cd86ced4e19887a378e
|
| 3 |
+
size 21812
|
wordsegmentation/skt_dcs_DS.bz2_4K_bigram_mir_10K/258696.ds.bz2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e02de9b1d92bade8bc49c2b8630c01f68579503757e436f0abea892765ac64ae
|
| 3 |
+
size 1046840
|