niobures commited on
Commit
bb654c7
·
verified ·
1 Parent(s): d36fe1c
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .classpath +31 -0
  2. .gitattributes +25 -0
  3. .gitignore +25 -0
  4. .project +17 -0
  5. .settings/org.eclipse.jdt.core.prefs +12 -0
  6. LICENSE +165 -0
  7. README.md +27 -0
  8. bin/.gitignore +3 -0
  9. build.xml +77 -0
  10. edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz +3 -0
  11. edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz +3 -0
  12. edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz +3 -0
  13. edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.prop +61 -0
  14. edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz +3 -0
  15. edu/stanford/nlp/models/ner/english.all.3class.distsim.prop +58 -0
  16. edu/stanford/nlp/models/ner/english.all.3class.nodistsim.crf.ser.gz +3 -0
  17. edu/stanford/nlp/models/ner/english.all.3class.nodistsim.prop +53 -0
  18. edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz +3 -0
  19. edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.prop +61 -0
  20. edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz +3 -0
  21. edu/stanford/nlp/models/ner/english.conll.4class.distsim.prop +63 -0
  22. edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.crf.ser.gz +3 -0
  23. edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.prop +62 -0
  24. edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz +3 -0
  25. edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.prop +55 -0
  26. edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz +3 -0
  27. edu/stanford/nlp/models/ner/english.muc.7class.distsim.prop +54 -0
  28. edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.crf.ser.gz +3 -0
  29. edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.prop +52 -0
  30. edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.crf.ser.gz +3 -0
  31. edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.prop +55 -0
  32. edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.crf.ser.gz +3 -0
  33. edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.prop +53 -0
  34. edu/stanford/nlp/models/ner/regexner.patterns +0 -0
  35. edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger +3 -0
  36. edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger.props +35 -0
  37. jar-in-jar-loader.zip +3 -0
  38. libs/NeuralNetwork.jar +0 -0
  39. libs/SMOTE.jar +0 -0
  40. libs/commons-cli-1.4.jar +0 -0
  41. libs/excel/commons-codec-1.10.jar +3 -0
  42. libs/excel/commons-collections4-4.1.jar +3 -0
  43. libs/excel/commons-logging-1.2.jar +0 -0
  44. libs/excel/curvesapi-1.04.jar +0 -0
  45. libs/excel/junit-4.12.jar +3 -0
  46. libs/excel/log4j-1.2.17.jar +3 -0
  47. libs/excel/poi-3.17.jar +3 -0
  48. libs/excel/poi-examples-3.17.jar +3 -0
  49. libs/excel/poi-excelant-3.17.jar +0 -0
  50. libs/excel/poi-ooxml-3.17.jar +3 -0
.classpath ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <classpath>
3
+ <classpathentry kind="src" path="src"/>
4
+ <classpathentry kind="lib" path="libs/excel/commons-codec-1.10.jar"/>
5
+ <classpathentry kind="lib" path="libs/excel/commons-collections4-4.1.jar"/>
6
+ <classpathentry kind="lib" path="libs/excel/commons-logging-1.2.jar"/>
7
+ <classpathentry kind="lib" path="libs/excel/curvesapi-1.04.jar"/>
8
+ <classpathentry kind="lib" path="libs/excel/junit-4.12.jar"/>
9
+ <classpathentry kind="lib" path="libs/excel/log4j-1.2.17.jar"/>
10
+ <classpathentry kind="lib" path="libs/excel/poi-3.17.jar"/>
11
+ <classpathentry kind="lib" path="libs/excel/poi-examples-3.17.jar"/>
12
+ <classpathentry kind="lib" path="libs/excel/poi-excelant-3.17.jar"/>
13
+ <classpathentry kind="lib" path="libs/excel/poi-ooxml-3.17.jar"/>
14
+ <classpathentry kind="lib" path="libs/excel/poi-ooxml-schemas-3.17.jar"/>
15
+ <classpathentry kind="lib" path="libs/excel/poi-scratchpad-3.17.jar"/>
16
+ <classpathentry kind="lib" path="libs/excel/xmlbeans-2.6.0.jar"/>
17
+ <classpathentry kind="lib" path="libs/mysql-connector-java-5.1.41.jar"/>
18
+ <classpathentry kind="lib" path="libs/SMOTE.jar"/>
19
+ <classpathentry kind="lib" path="libs/snowball-stemmer-1.3.0.581.1.jar"/>
20
+ <classpathentry kind="lib" path="libs/stanford-corenlp-3.8.0.jar"/>
21
+ <classpathentry kind="lib" path="libs/stanford-parser.jar"/>
22
+ <classpathentry kind="lib" path="libs/stanford-postagger.jar"/>
23
+ <classpathentry kind="lib" path="libs/weka-src.jar"/>
24
+ <classpathentry kind="lib" path="libs/weka.jar"/>
25
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
26
+ <classpathentry kind="lib" path="libs/commons-cli-1.4.jar"/>
27
+ <classpathentry kind="lib" path="libs/mtj-1.0-snapshot.jar"/>
28
+ <classpathentry kind="lib" path="libs/NeuralNetwork.jar"/>
29
+ <classpathentry kind="lib" path="libs/opencsv-2.3.jar"/>
30
+ <classpathentry kind="output" path="bin"/>
31
+ </classpath>
.gitattributes CHANGED
@@ -33,3 +33,28 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
37
+ libs/excel/commons-codec-1.10.jar filter=lfs diff=lfs merge=lfs -text
38
+ libs/excel/commons-collections4-4.1.jar filter=lfs diff=lfs merge=lfs -text
39
+ libs/excel/junit-4.12.jar filter=lfs diff=lfs merge=lfs -text
40
+ libs/excel/log4j-1.2.17.jar filter=lfs diff=lfs merge=lfs -text
41
+ libs/excel/poi-3.17.jar filter=lfs diff=lfs merge=lfs -text
42
+ libs/excel/poi-examples-3.17.jar filter=lfs diff=lfs merge=lfs -text
43
+ libs/excel/poi-ooxml-3.17.jar filter=lfs diff=lfs merge=lfs -text
44
+ libs/excel/poi-ooxml-schemas-3.17.jar filter=lfs diff=lfs merge=lfs -text
45
+ libs/excel/poi-scratchpad-3.17.jar filter=lfs diff=lfs merge=lfs -text
46
+ libs/excel/xmlbeans-2.6.0.jar filter=lfs diff=lfs merge=lfs -text
47
+ libs/mtj-1.0-snapshot.jar filter=lfs diff=lfs merge=lfs -text
48
+ libs/mysql-connector-java-5.1.41.jar filter=lfs diff=lfs merge=lfs -text
49
+ libs/stanford-corenlp-3.8.0.jar filter=lfs diff=lfs merge=lfs -text
50
+ libs/stanford-parser.jar filter=lfs diff=lfs merge=lfs -text
51
+ libs/stanford-postagger.jar filter=lfs diff=lfs merge=lfs -text
52
+ libs/weka-src.jar filter=lfs diff=lfs merge=lfs -text
53
+ libs/weka.jar filter=lfs diff=lfs merge=lfs -text
54
+ models/sentise-oracle-short.xlsx filter=lfs diff=lfs merge=lfs -text
55
+ models/sentise-oracle1.xlsx filter=lfs diff=lfs merge=lfs -text
56
+ models/sentise-oracle2.xlsx filter=lfs diff=lfs merge=lfs -text
57
+ models/SentiWordNet_3.0.0_20130122.txt filter=lfs diff=lfs merge=lfs -text
58
+ sentise.jar filter=lfs diff=lfs merge=lfs -text
59
+ src/taggers/bidirectional-distsim-wsj-0-18.tagger filter=lfs diff=lfs merge=lfs -text
60
+ src/taggers/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Compiled class file
2
+ *.class
3
+
4
+ # Log file
5
+ *.log
6
+
7
+ # BlueJ files
8
+ *.ctxt
9
+
10
+ # Mobile Tools for Java (J2ME)
11
+ .mtj.tmp/
12
+
13
+ # Package Files #
14
+ *.war
15
+ *.nar
16
+ *.ear
17
+ *.csss
18
+ *.css
19
+ *.html
20
+ *.tar.gz
21
+ *.rar
22
+ *.scss
23
+
24
+ # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
25
+ hs_err_pid*
.project ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <projectDescription>
3
+ <name>SentiSE</name>
4
+ <comment></comment>
5
+ <projects>
6
+ </projects>
7
+ <buildSpec>
8
+ <buildCommand>
9
+ <name>org.eclipse.jdt.core.javabuilder</name>
10
+ <arguments>
11
+ </arguments>
12
+ </buildCommand>
13
+ </buildSpec>
14
+ <natures>
15
+ <nature>org.eclipse.jdt.core.javanature</nature>
16
+ </natures>
17
+ </projectDescription>
.settings/org.eclipse.jdt.core.prefs ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eclipse.preferences.version=1
2
+ org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
+ org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
4
+ org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
5
+ org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6
+ org.eclipse.jdt.core.compiler.compliance=1.8
7
+ org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8
+ org.eclipse.jdt.core.compiler.debug.localVariable=generate
9
+ org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10
+ org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11
+ org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12
+ org.eclipse.jdt.core.compiler.source=1.8
LICENSE ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU LESSER GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+
9
+ This version of the GNU Lesser General Public License incorporates
10
+ the terms and conditions of version 3 of the GNU General Public
11
+ License, supplemented by the additional permissions listed below.
12
+
13
+ 0. Additional Definitions.
14
+
15
+ As used herein, "this License" refers to version 3 of the GNU Lesser
16
+ General Public License, and the "GNU GPL" refers to version 3 of the GNU
17
+ General Public License.
18
+
19
+ "The Library" refers to a covered work governed by this License,
20
+ other than an Application or a Combined Work as defined below.
21
+
22
+ An "Application" is any work that makes use of an interface provided
23
+ by the Library, but which is not otherwise based on the Library.
24
+ Defining a subclass of a class defined by the Library is deemed a mode
25
+ of using an interface provided by the Library.
26
+
27
+ A "Combined Work" is a work produced by combining or linking an
28
+ Application with the Library. The particular version of the Library
29
+ with which the Combined Work was made is also called the "Linked
30
+ Version".
31
+
32
+ The "Minimal Corresponding Source" for a Combined Work means the
33
+ Corresponding Source for the Combined Work, excluding any source code
34
+ for portions of the Combined Work that, considered in isolation, are
35
+ based on the Application, and not on the Linked Version.
36
+
37
+ The "Corresponding Application Code" for a Combined Work means the
38
+ object code and/or source code for the Application, including any data
39
+ and utility programs needed for reproducing the Combined Work from the
40
+ Application, but excluding the System Libraries of the Combined Work.
41
+
42
+ 1. Exception to Section 3 of the GNU GPL.
43
+
44
+ You may convey a covered work under sections 3 and 4 of this License
45
+ without being bound by section 3 of the GNU GPL.
46
+
47
+ 2. Conveying Modified Versions.
48
+
49
+ If you modify a copy of the Library, and, in your modifications, a
50
+ facility refers to a function or data to be supplied by an Application
51
+ that uses the facility (other than as an argument passed when the
52
+ facility is invoked), then you may convey a copy of the modified
53
+ version:
54
+
55
+ a) under this License, provided that you make a good faith effort to
56
+ ensure that, in the event an Application does not supply the
57
+ function or data, the facility still operates, and performs
58
+ whatever part of its purpose remains meaningful, or
59
+
60
+ b) under the GNU GPL, with none of the additional permissions of
61
+ this License applicable to that copy.
62
+
63
+ 3. Object Code Incorporating Material from Library Header Files.
64
+
65
+ The object code form of an Application may incorporate material from
66
+ a header file that is part of the Library. You may convey such object
67
+ code under terms of your choice, provided that, if the incorporated
68
+ material is not limited to numerical parameters, data structure
69
+ layouts and accessors, or small macros, inline functions and templates
70
+ (ten or fewer lines in length), you do both of the following:
71
+
72
+ a) Give prominent notice with each copy of the object code that the
73
+ Library is used in it and that the Library and its use are
74
+ covered by this License.
75
+
76
+ b) Accompany the object code with a copy of the GNU GPL and this license
77
+ document.
78
+
79
+ 4. Combined Works.
80
+
81
+ You may convey a Combined Work under terms of your choice that,
82
+ taken together, effectively do not restrict modification of the
83
+ portions of the Library contained in the Combined Work and reverse
84
+ engineering for debugging such modifications, if you also do each of
85
+ the following:
86
+
87
+ a) Give prominent notice with each copy of the Combined Work that
88
+ the Library is used in it and that the Library and its use are
89
+ covered by this License.
90
+
91
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
92
+ document.
93
+
94
+ c) For a Combined Work that displays copyright notices during
95
+ execution, include the copyright notice for the Library among
96
+ these notices, as well as a reference directing the user to the
97
+ copies of the GNU GPL and this license document.
98
+
99
+ d) Do one of the following:
100
+
101
+ 0) Convey the Minimal Corresponding Source under the terms of this
102
+ License, and the Corresponding Application Code in a form
103
+ suitable for, and under terms that permit, the user to
104
+ recombine or relink the Application with a modified version of
105
+ the Linked Version to produce a modified Combined Work, in the
106
+ manner specified by section 6 of the GNU GPL for conveying
107
+ Corresponding Source.
108
+
109
+ 1) Use a suitable shared library mechanism for linking with the
110
+ Library. A suitable mechanism is one that (a) uses at run time
111
+ a copy of the Library already present on the user's computer
112
+ system, and (b) will operate properly with a modified version
113
+ of the Library that is interface-compatible with the Linked
114
+ Version.
115
+
116
+ e) Provide Installation Information, but only if you would otherwise
117
+ be required to provide such information under section 6 of the
118
+ GNU GPL, and only to the extent that such information is
119
+ necessary to install and execute a modified version of the
120
+ Combined Work produced by recombining or relinking the
121
+ Application with a modified version of the Linked Version. (If
122
+ you use option 4d0, the Installation Information must accompany
123
+ the Minimal Corresponding Source and Corresponding Application
124
+ Code. If you use option 4d1, you must provide the Installation
125
+ Information in the manner specified by section 6 of the GNU GPL
126
+ for conveying Corresponding Source.)
127
+
128
+ 5. Combined Libraries.
129
+
130
+ You may place library facilities that are a work based on the
131
+ Library side by side in a single library together with other library
132
+ facilities that are not Applications and are not covered by this
133
+ License, and convey such a combined library under terms of your
134
+ choice, if you do both of the following:
135
+
136
+ a) Accompany the combined library with a copy of the same work based
137
+ on the Library, uncombined with any other library facilities,
138
+ conveyed under the terms of this License.
139
+
140
+ b) Give prominent notice with the combined library that part of it
141
+ is a work based on the Library, and explaining where to find the
142
+ accompanying uncombined form of the same work.
143
+
144
+ 6. Revised Versions of the GNU Lesser General Public License.
145
+
146
+ The Free Software Foundation may publish revised and/or new versions
147
+ of the GNU Lesser General Public License from time to time. Such new
148
+ versions will be similar in spirit to the present version, but may
149
+ differ in detail to address new problems or concerns.
150
+
151
+ Each version is given a distinguishing version number. If the
152
+ Library as you received it specifies that a certain numbered version
153
+ of the GNU Lesser General Public License "or any later version"
154
+ applies to it, you have the option of following the terms and
155
+ conditions either of that published version or of any later version
156
+ published by the Free Software Foundation. If the Library as you
157
+ received it does not specify a version number of the GNU Lesser
158
+ General Public License, you may choose any version of the GNU Lesser
159
+ General Public License ever published by the Free Software Foundation.
160
+
161
+ If the Library as you received it specifies that a proxy can decide
162
+ whether future versions of the GNU Lesser General Public License shall
163
+ apply, that proxy's public statement of acceptance of any version is
164
+ permanent authorization for you to choose that version for the
165
+ Library.
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SentiSE
2
+ SentiSE is a sentiment analysis tool for Software Engineering interactions
3
+
4
+ SentiSE, a supervised learning based sentiment analysis tool that incorporates ten supervised learning algorithms and
5
+ fourteen different optional pre-processing steps that are commonly used to improve the performance of sentiment analysis tools.
6
+ We empirically evaluated each of the algorithms and preprocessing steps to determine the best configuration.
7
+ We evaluated SentiSE using a large-scale labeled dataset of 13K comments from three different types of SE interactions.
8
+ <br><br>
9
+ **Performance Evaluation:**
10
+ We compare SentiSE with other sentiment analysis tool available in software engineering domain. We use two dataset for this evaluation. _Orcale1_ with 13 k labeled dataset with 21% positive, 60% neutral and 19% negative data and _Oracle2_ with 30% positive, 40% neutral and 30% negative data. Table bellow shows the performance comparison:
11
+
12
+ | Oracle | tool |Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Accuracy | Weighted <br> Kappa |
13
+ |--|--| -- | -- | -- | -- | --| -- | -- | --| -- | -- | --|
14
+ |_Orcal1_|SentiSE | 85.63% | 75.27% | 80.11% | 81.51% | 92.78% | 86.78% | 81.03% | 55.92%|66.16%|82.23%|0.681
15
+ |_Oracle1_| SentiCR | 81.81% | 76.59%| 79.04%| 80.04% | 92.77% | 85.92% | 82.71% | 46.38% | 59.40% | 80.6655% | 0.647
16
+ |_Oracle1_| SentiStrength-SE | 75.81% | 81.45% | 78.53% | 84.68% | 83.64% | 84.16% | 66.50% | 63.42% | 64.92% | 79.32% | 0.6587
17
+ |_Oracle2_| SentiSE | 88.83% | 85.09% | 86.92% | 86.62% | 91.52% | 89.00% | 85.87% | 78.61% | 82.07% | 86.92% | 0.788
18
+ |_Oracle2_|SentiCR | 84.32% | 84.73% | 84.50% | 80.70% | 92.08% | 86.00% | 86.45% | 59.49\% | 70.40% | 82.47% | 0.716
19
+ |_Oracle2_| SentiStrength-SE | 79.56% | 83.57% | 81.52% | 80.73% | 84.15% | 82.41% | 80.41% | 69.31% | 74.45% | 80.34% | 0. 696
20
+
21
+ <br><br><br>
22
+ **Usage Instructions:**
23
+ Downaload and import SentiSe Project. Run the build.xml file and generate the sentise.jar. SentiSE is a commandline base tool. Use the command <code> java -jar sentise.jar -help</code> to find all the commands avialable in sentiSE.
24
+
25
+ <br><br>
26
+ **ScreenShot**
27
+ ![SentiSE-cli](https://github.com/amiangshu/SentiSE/blob/tanzeerH-readme/models/cli.png)
bin/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ /edu/
2
+ /taggers/
3
+ /weka/
build.xml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <project default="jar" name="SentiSE Builder" >
3
+ <property name="src.dir" value="src"/>
4
+
5
+ <property name="build.dir" value="build"/>
6
+ <property name="classes.dir" value="${build.dir}/bin"/>
7
+ <property name="jar.dir" value="${class.dir}"/>
8
+ <property name="dir.jarfile" value="./"/>
9
+ <property name="main-class" value="edu.siu.sentise.SentiSE"/>
10
+
11
+ <path id="libs-class">
12
+ <fileset dir="./libs">
13
+ <include name="*.jar"/>
14
+ </fileset>
15
+ </path>
16
+
17
+ <path id="excel-class">
18
+ <fileset dir="./libs/excel">
19
+ <include name="*.jar"/>
20
+ </fileset>
21
+ </path>
22
+
23
+
24
+ <target name="clean">
25
+ <delete dir="${build.dir}"/>
26
+ </target>
27
+
28
+ <target name="compile">
29
+ <mkdir dir="${classes.dir}"/>
30
+ <mkdir dir="${build.dir}"/>
31
+
32
+ <javac destdir="${classes.dir}" includeantruntime="false" >
33
+ <classpath refid="libs-class"/>
34
+ <classpath refid="excel-class"/>
35
+ <src path="${src.dir}"/>
36
+
37
+ </javac>
38
+ </target>
39
+
40
+
41
+
42
+ <target name="jar" depends="compile">
43
+ <jar destfile="${jar.dir}/sentise.jar">
44
+ <manifest>
45
+ <attribute name="Main-Class" value="org.eclipse.jdt.internal.jarinjarloader.JarRsrcLoader"/>
46
+ <attribute name="Rsrc-Main-Class" value="edu.sentise.SentiSE"/>
47
+ <attribute name="Class-Path" value="."/>
48
+ <attribute name="Rsrc-Class-Path" value="./ mysql-connector-java-5.1.41.jar SMOTE.jar snowball-stemmer-1.3.0.581.1.jar stanford-corenlp-3.8.0.jar stanford-parser.jar stanford-postagger.jar weka-src.jar weka.jar commons-codec-1.10.jar commons-collections4-4.1.jar junit-4.12.jar log4j-1.2.17.jar poi-3.17.jar poi-examples-3.17.jar poi-excelant-3.17.jar poi-ooxml-3.17.jar poi-ooxml-schemas-3.17.jar poi-scratchpad-3.17.jar xmlbeans-2.6.0.jar commons-cli-1.4.jar mtj-1.0-snapshot.jar opencsv-2.3.jar NeuralNetwork.jar"/>
49
+ </manifest>
50
+ <zipfileset src="jar-in-jar-loader.zip"/>
51
+ <fileset dir="${dir.jarfile}/bin"/>
52
+ <zipfileset dir="${dir.jarfile}/libs" includes="mysql-connector-java-5.1.41.jar"/>
53
+ <zipfileset dir="${dir.jarfile}/libs" includes="SMOTE.jar"/>
54
+ <zipfileset dir="${dir.jarfile}/libs" includes="snowball-stemmer-1.3.0.581.1.jar"/>
55
+ <zipfileset dir="${dir.jarfile}/libs" includes="stanford-corenlp-3.8.0.jar"/>
56
+ <zipfileset dir="${dir.jarfile}/libs" includes="stanford-parser.jar"/>
57
+ <zipfileset dir="${dir.jarfile}/libs" includes="stanford-postagger.jar"/>
58
+ <zipfileset dir="${dir.jarfile}/libs" includes="weka-src.jar"/>
59
+ <zipfileset dir="${dir.jarfile}/libs" includes="weka.jar"/>
60
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="commons-codec-1.10.jar"/>
61
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="commons-collections4-4.1.jar"/>
62
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="junit-4.12.jar"/>
63
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="log4j-1.2.17.jar"/>
64
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-3.17.jar"/>
65
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-examples-3.17.jar"/>
66
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-excelant-3.17.jar"/>
67
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-ooxml-3.17.jar"/>
68
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-ooxml-schemas-3.17.jar"/>
69
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-scratchpad-3.17.jar"/>
70
+ <zipfileset dir="${dir.jarfile}/libs/excel" includes="xmlbeans-2.6.0.jar"/>
71
+ <zipfileset dir="${dir.jarfile}/libs" includes="commons-cli-1.4.jar"/>
72
+ <zipfileset dir="${dir.jarfile}/libs" includes="mtj-1.0-snapshot.jar"/>
73
+ <zipfileset dir="${dir.jarfile}/libs" includes="opencsv-2.3.jar"/>
74
+ <zipfileset dir="${dir.jarfile}/libs" includes="NeuralNetwork.jar"/>
75
+ </jar>
76
+ </target>
77
+ </project>
edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:defb4d91b899c4f92ea605f026c57408c4d3df1fb371b50f2d055ace782cbc1e
3
+ size 1769534
edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed674cce7311f76c018806e61cdb045e219258ee30014da4e3e686b9230ebea9
3
+ size 1883795
edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c9a504ad8f82f415ad64ce96478c59788355d8096edc96ccb8b43289739f2f1
3
+ size 37319863
edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.prop ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
2
+ trainFileList = /u/nlp/data/ner/column_data/ace23.3class.train,/u/nlp/data/ner/column_data/muc6.3class.ptb.train,/u/nlp/data/ner/column_data/muc7.3class.ptb.train,/u/nlp/data/ner/column_data/conll.3class.train,/u/nlp/data/ner/column_data/wikiner.3class.train,/u/nlp/data/ner/column_data/ontonotes.3class.train,/u/nlp/data/ner/column_data/english.extra.3class.train
3
+ testFiles = /u/nlp/data/ner/column_data/all.3class.test,/u/nlp/data/ner/column_data/all.3class.upper.test,/u/nlp/data/ner/column_data/all.3class.lower.test
4
+ # testFile = uppercase.tsv
5
+ serializeTo = english.all.3class.caseless.distsim.crf.ser.gz
6
+
7
+ type = crf
8
+
9
+ wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
10
+ useKnownLCWords = false
11
+
12
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
13
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
14
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
15
+ # right options for egw4-reut.512 (though effect of having or not is small)
16
+ numberEquivalenceDistSim = true
17
+ unknownWordDistSimClass = 0
18
+ useDistSim = true
19
+
20
+ map = word=0,answer=1
21
+
22
+ # saveFeatureIndexToDisk = true
23
+
24
+ useClassFeature=true
25
+ useWord=true
26
+ #useWordPairs=true
27
+ useNGrams=true
28
+ noMidNGrams=true
29
+ maxNGramLeng=6
30
+ usePrev=true
31
+ useNext=true
32
+ #useTags=true
33
+ #useWordTag=true
34
+ useLongSequences=true
35
+ useSequences=true
36
+ usePrevSequences=true
37
+ useTypeSeqs=true
38
+ useTypeSeqs2=true
39
+ useTypeySequences=true
40
+ useOccurrencePatterns=true
41
+ useLastRealWord=true
42
+ useNextRealWord=true
43
+ #useReverse=false
44
+ normalize=true
45
+ # normalizeTimex=true
46
+ wordShape=chris2useLC
47
+ useDisjunctive=true
48
+ disjunctionWidth=5
49
+ #useDisjunctiveShapeInteraction=true
50
+
51
+ maxLeft=1
52
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
53
+
54
+ useObservedSequencesOnly=true
55
+
56
+ useQN = true
57
+ QNsize = 25
58
+
59
+ # makes it go faster
60
+ featureDiffThresh=0.05
61
+
edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65905fe1bbba47d0ac2b468e63d91cfde72dce0a7261345e015c45fab9cff93f
3
+ size 31985042
edu/stanford/nlp/models/ner/english.all.3class.distsim.prop ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
2
+ trainFileList = /u/nlp/data/ner/column_data/ace23.3class.train,/u/nlp/data/ner/column_data/muc6.3class.ptb.train,/u/nlp/data/ner/column_data/muc7.3class.ptb.train,/u/nlp/data/ner/column_data/conll.3class.train,/u/nlp/data/ner/column_data/wikiner.3class.train,/u/nlp/data/ner/column_data/ontonotes.3class.train,/u/nlp/data/ner/column_data/english.extra.3class.train
3
+ testFile = /u/nlp/data/ner/column_data/all.3class.test
4
+ serializeTo = english.all.3class.distsim.crf.ser.gz
5
+
6
+ type = crf
7
+
8
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
9
+
10
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
11
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
12
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
13
+ # right options for egw4-reut.512 (though effect of having or not is small)
14
+ numberEquivalenceDistSim = true
15
+ unknownWordDistSimClass = 0
16
+ useDistSim = true
17
+
18
+ map = word=0,answer=1
19
+
20
+ saveFeatureIndexToDisk = true
21
+
22
+ useClassFeature=true
23
+ useWord=true
24
+ #useWordPairs=true
25
+ useNGrams=true
26
+ noMidNGrams=true
27
+ maxNGramLeng=6
28
+ usePrev=true
29
+ useNext=true
30
+ #useTags=true
31
+ #useWordTag=true
32
+ useLongSequences=true
33
+ useSequences=true
34
+ usePrevSequences=true
35
+ useTypeSeqs=true
36
+ useTypeSeqs2=true
37
+ useTypeySequences=true
38
+ useOccurrencePatterns=true
39
+ useLastRealWord=true
40
+ useNextRealWord=true
41
+ #useReverse=false
42
+ normalize=true
43
+ # normalizeTimex=true
44
+ wordShape=chris2useLC
45
+ useDisjunctive=true
46
+ disjunctionWidth=5
47
+ #useDisjunctiveShapeInteraction=true
48
+
49
+ maxLeft=1
50
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
51
+
52
+ useObservedSequencesOnly=true
53
+
54
+ useQN = true
55
+ QNsize = 25
56
+
57
+ # makes it go faster
58
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.all.3class.nodistsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77339de3c30111b12302d331d97b24304fdf537258e3b6007a1e6e96390ce9b2
3
+ size 23179051
edu/stanford/nlp/models/ner/english.all.3class.nodistsim.prop ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
2
+ testFile = /u/nlp/data/ner/column_data/all.3class.test
3
+ serializeTo = english.all.3class.nodistsim.crf.ser.gz
4
+
5
+ type = crf
6
+
7
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
8
+
9
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
10
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200.pruned
11
+ useDistSim = false
12
+
13
+ map = word=0,answer=1
14
+
15
+ saveFeatureIndexToDisk = true
16
+
17
+ useClassFeature=true
18
+ useWord=true
19
+ #useWordPairs=true
20
+ useNGrams=true
21
+ noMidNGrams=true
22
+ maxNGramLeng=6
23
+ usePrev=true
24
+ useNext=true
25
+ #useTags=true
26
+ #useWordTag=true
27
+ useLongSequences=true
28
+ useSequences=true
29
+ usePrevSequences=true
30
+ maxLeft=1
31
+ useTypeSeqs=true
32
+ useTypeSeqs2=true
33
+ useTypeySequences=true
34
+ useOccurrencePatterns=true
35
+ useLastRealWord=true
36
+ useNextRealWord=true
37
+ #useReverse=false
38
+ normalize=true
39
+ # normalizeTimex=true
40
+ wordShape=chris2useLC
41
+ useDisjunctive=true
42
+ disjunctionWidth=5
43
+ #useDisjunctiveShapeInteraction=true
44
+
45
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
46
+
47
+ useObservedSequencesOnly=true
48
+
49
+ useQN = true
50
+ QNsize = 25
51
+
52
+ # makes it go faster
53
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2073dcfff4061549fe6aa6b6fd22246ae6bad28c819c6806d690ad3c1e383c8d
3
+ size 21421444
edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.prop ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is better than Jenny's either with or without distsim turned on
2
+ # And using iob2 is better for optimal CoNLL performance.
3
+ # Features titled "chris2009"
4
+
5
+ trainFile = /u/nlp/data/ner/column_data/conll.4class.train
6
+ # testFile = /u/nlp/data/ner/column_data/conll.4class.testa
7
+ serializeTo = english.conll.4class.caseless.distsim.crf.ser.gz
8
+
9
+ wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
10
+ useKnownLCWords = false
11
+
12
+ useDistSim = true
13
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
14
+
15
+ map = word=0,answer=1
16
+
17
+ saveFeatureIndexToDisk = true
18
+
19
+ useTitle = true
20
+ useClassFeature=true
21
+ useWord=true
22
+ # useWordPairs=true
23
+ useNGrams=true
24
+ noMidNGrams=true
25
+ # maxNGramLeng=6 # Having them all helps, which is the default
26
+ usePrev=true
27
+ useNext=true
28
+ # useTags=true
29
+ # useWordTag=true
30
+ useLongSequences=true
31
+ useSequences=true
32
+ usePrevSequences=true
33
+ maxLeft=1
34
+ useTypeSeqs=true
35
+ useTypeSeqs2=true
36
+ useTypeySequences=true
37
+ useOccurrencePatterns=true
38
+ useLastRealWord=true
39
+ useNextRealWord=true
40
+ #useReverse=false
41
+ normalize=true
42
+ # normalizeTimex=true
43
+ # dan2 better than chris2 on CoNLL data...
44
+ wordShape=dan2useLC
45
+ useDisjunctive=true
46
+ # disjunctionWidth 4 is better than 5 on CoNLL data
47
+ disjunctionWidth=4
48
+ #useDisjunctiveShapeInteraction=true
49
+
50
+ type=crf
51
+
52
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
53
+
54
+ useObservedSequencesOnly=true
55
+
56
+ sigma = 20
57
+ useQN = true
58
+ QNsize = 25
59
+
60
+ # makes it go faster
61
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60c3630612f2c93420f191fec792f87a4f64aee38998324b7a1f8b5c90f7363
3
+ size 17803778
edu/stanford/nlp/models/ner/english.conll.4class.distsim.prop ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is better than Jenny's either with or without distsim turned on
2
+ # And using iob2 is better for optimal CoNLL performance.
3
+ # Features titled "chris2009"
4
+
5
+ trainFile = /u/nlp/data/ner/column_data/conll.4class.train
6
+ # testFile = /u/nlp/data/ner/column_data/conll.4class.testa
7
+ serializeTo = english.conll.4class.distsim.crf.ser.gz
8
+
9
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
10
+
11
+ useDistSim = true
12
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
13
+ # right options for egw4-reut.512 (though effect of having or not is small)
14
+ numberEquivalenceDistSim = true
15
+ unknownWordDistSimClass = 0
16
+
17
+ map = word=0,answer=1
18
+
19
+ saveFeatureIndexToDisk = true
20
+
21
+ useTitle = true
22
+ useClassFeature=true
23
+ useWord=true
24
+ # useWordPairs=true
25
+ useNGrams=true
26
+ noMidNGrams=true
27
+ # maxNGramLeng=6 # Having them all helps, which is the default
28
+ usePrev=true
29
+ useNext=true
30
+ # useTags=true
31
+ # useWordTag=true
32
+ useLongSequences=true
33
+ useSequences=true
34
+ usePrevSequences=true
35
+ maxLeft=1
36
+ useTypeSeqs=true
37
+ useTypeSeqs2=true
38
+ useTypeySequences=true
39
+ useOccurrencePatterns=true
40
+ useLastRealWord=true
41
+ useNextRealWord=true
42
+ #useReverse=false
43
+ normalize=true
44
+ # normalizeTimex=true
45
+ # dan2 better than chris2 on CoNLL data...
46
+ wordShape=dan2useLC
47
+ useDisjunctive=true
48
+ # disjunctionWidth 4 is better than 5 on CoNLL data
49
+ disjunctionWidth=4
50
+ #useDisjunctiveShapeInteraction=true
51
+
52
+ type=crf
53
+
54
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
55
+
56
+ useObservedSequencesOnly=true
57
+
58
+ sigma = 20
59
+ useQN = true
60
+ QNsize = 25
61
+
62
+ # makes it go faster
63
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b14f1b2b25935f0c7b1499e3a910ec11d3bbb1e7350ed0f888b8f170fe38a90c
3
+ size 14860645
edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.prop ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is better than Jenny's either with or without distsim turned on
2
+ # And using iob2 is better for optimal CoNLL performance.
3
+ # Features labeled "chris2009"
4
+
5
+ trainFile = /u/nlp/data/ner/column_data/conll.4class.train
6
+ # testFile = /u/nlp/data/ner/column_data/conll.4class.testa
7
+ serializeTo = english.conll.4class.nodistsim.crf.ser.gz
8
+
9
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
10
+
11
+ # This is good, but deliberately not used here
12
+ # useDistSim = true
13
+ useDistSim = false
14
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
15
+
16
+ map = word=0,answer=1
17
+
18
+ saveFeatureIndexToDisk = true
19
+
20
+ useTitle = true
21
+ useClassFeature=true
22
+ useWord=true
23
+ # useWordPairs=true
24
+ useNGrams=true
25
+ noMidNGrams=true
26
+ # maxNGramLeng=6 # Having them all helps, which is the default
27
+ usePrev=true
28
+ useNext=true
29
+ # useTags=true
30
+ # useWordTag=true
31
+ useLongSequences=true
32
+ useSequences=true
33
+ usePrevSequences=true
34
+ maxLeft=1
35
+ useTypeSeqs=true
36
+ useTypeSeqs2=true
37
+ useTypeySequences=true
38
+ useOccurrencePatterns=true
39
+ useLastRealWord=true
40
+ useNextRealWord=true
41
+ #useReverse=false
42
+ normalize=true
43
+ # normalizeTimex=true
44
+ # dan2 better than chris2 on CoNLL data...
45
+ wordShape=dan2useLC
46
+ useDisjunctive=true
47
+ # disjunctionWidth 4 is better than 5 on CoNLL data
48
+ disjunctionWidth=4
49
+ #useDisjunctiveShapeInteraction=true
50
+
51
+ type=crf
52
+
53
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
54
+
55
+ useObservedSequencesOnly=true
56
+
57
+ sigma = 20
58
+ useQN = true
59
+ QNsize = 25
60
+
61
+ # makes it go faster
62
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:821281c380d277749c2641d6694584beef533c9cf222d24f03a79d45c18e1291
3
+ size 20118311
edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.prop ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainFileList = /u/nlp/data/ner/column_data/muc6.ptb.train,/u/nlp/data/ner/column_data/muc7.ptb.train
2
+ # testFile = /u/nlp/data/ner/column_data/muc7.ptb.devtest
3
+ serializeTo = english.muc.7class.caseless.distsim.crf.ser.gz
4
+
5
+ type=crf
6
+
7
+ wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
8
+ useKnownLCWords = false
9
+
10
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
11
+ numberEquivalenceDistSim = true
12
+ unknownWordDistSimClass = 0
13
+ useDistSim = true
14
+
15
+ map = word=0,answer=1
16
+
17
+ saveFeatureIndexToDisk = true
18
+
19
+ useClassFeature=true
20
+ useWord=true
21
+ #useWordPairs=true
22
+ useNGrams=true
23
+ noMidNGrams=true
24
+ maxNGramLeng=6
25
+ usePrev=true
26
+ useNext=true
27
+ #useTags=true
28
+ #useWordTag=true
29
+ useLongSequences=true
30
+ useSequences=true
31
+ usePrevSequences=true
32
+ useTypeSeqs=true
33
+ useTypeSeqs2=true
34
+ useTypeySequences=true
35
+ useOccurrencePatterns=true
36
+ useLastRealWord=true
37
+ useNextRealWord=true
38
+ #useReverse=false
39
+ normalize=true
40
+ # normalizeTimex=true
41
+ wordShape=chris2useLC
42
+ useDisjunctive=true
43
+ disjunctionWidth=5
44
+ #useDisjunctiveShapeInteraction=true
45
+
46
+ maxLeft=1
47
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
48
+
49
+ useObservedSequencesOnly=true
50
+
51
+ useQN = true
52
+ QNsize = 25
53
+
54
+ # makes it go faster
55
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d7308d176718cdd0e7a062aaaf5e94d2c4bb8c9b787f2177196dcce1db5d0c
3
+ size 17859765
edu/stanford/nlp/models/ner/english.muc.7class.distsim.prop ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainFileList = /u/nlp/data/ner/column_data/muc6.ptb.train,/u/nlp/data/ner/column_data/muc7.ptb.train
2
+ # testFile = /u/nlp/data/ner/column_data/muc7.ptb.devtest
3
+ serializeTo = english.muc.7class.distsim.crf.ser.gz
4
+
5
+ type=crf
6
+
7
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
8
+
9
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
10
+ numberEquivalenceDistSim = true
11
+ unknownWordDistSimClass = 0
12
+ useDistSim = true
13
+
14
+ map = word=0,answer=1
15
+
16
+ saveFeatureIndexToDisk = true
17
+
18
+ useClassFeature=true
19
+ useWord=true
20
+ #useWordPairs=true
21
+ useNGrams=true
22
+ noMidNGrams=true
23
+ maxNGramLeng=6
24
+ usePrev=true
25
+ useNext=true
26
+ #useTags=true
27
+ #useWordTag=true
28
+ useLongSequences=true
29
+ useSequences=true
30
+ usePrevSequences=true
31
+ useTypeSeqs=true
32
+ useTypeSeqs2=true
33
+ useTypeySequences=true
34
+ useOccurrencePatterns=true
35
+ useLastRealWord=true
36
+ useNextRealWord=true
37
+ #useReverse=false
38
+ normalize=true
39
+ # normalizeTimex=true
40
+ wordShape=chris2useLC
41
+ useDisjunctive=true
42
+ disjunctionWidth=5
43
+ #useDisjunctiveShapeInteraction=true
44
+
45
+ maxLeft=1
46
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
47
+
48
+ useObservedSequencesOnly=true
49
+
50
+ useQN = true
51
+ QNsize = 25
52
+
53
+ # makes it go faster
54
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f1e3940b1424ffc699c2a3d74274e53b317d31532f4f955ca811d2a6e5b660
3
+ size 14494199
edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.prop ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.train
2
+ testFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.test
3
+ serializeTo = english.muc.7class.nodistsim.crf.ser.gz
4
+
5
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
6
+
7
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
8
+ useDistSim = false
9
+
10
+ map = word=0,answer=1
11
+
12
+ saveFeatureIndexToDisk = true
13
+
14
+ useClassFeature=true
15
+ useWord=true
16
+ #useWordPairs=true
17
+ useNGrams=true
18
+ noMidNGrams=true
19
+ maxNGramLeng=6
20
+ usePrev=true
21
+ useNext=true
22
+ #useTags=true
23
+ #useWordTag=true
24
+ useLongSequences=true
25
+ useSequences=true
26
+ usePrevSequences=true
27
+ maxLeft=1
28
+ useTypeSeqs=true
29
+ useTypeSeqs2=true
30
+ useTypeySequences=true
31
+ useOccurrencePatterns=true
32
+ useLastRealWord=true
33
+ useNextRealWord=true
34
+ #useReverse=false
35
+ normalize=true
36
+ # normalizeTimex=true
37
+ wordShape=chris2useLC
38
+ useDisjunctive=true
39
+ disjunctionWidth=5
40
+ #useDisjunctiveShapeInteraction=true
41
+
42
+ type=crf
43
+
44
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
45
+
46
+ useObservedSequencesOnly=true
47
+
48
+ useQN = true
49
+ QNsize = 25
50
+
51
+ # makes it go faster
52
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645677c2646791ac2a07b29be12af60cff84ea02936ac375af8b03939eb30adf
3
+ size 20284974
edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.prop ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
2
+ testFile = /u/nlp/data/ner/column_data/conll.testa
3
+ serializeTo = english.nowiki.3class.caseless.distsim.crf.ser.gz
4
+
5
+ type = crf
6
+
7
+ wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
8
+ useKnownLCWords = false
9
+
10
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
11
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
12
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
13
+ useDistSim = true
14
+
15
+ map = word=0,answer=1
16
+
17
+ saveFeatureIndexToDisk = true
18
+
19
+ useClassFeature=true
20
+ useWord=true
21
+ #useWordPairs=true
22
+ useNGrams=true
23
+ noMidNGrams=true
24
+ maxNGramLeng=6
25
+ usePrev=true
26
+ useNext=true
27
+ #useTags=true
28
+ #useWordTag=true
29
+ useLongSequences=true
30
+ useSequences=true
31
+ usePrevSequences=true
32
+ maxLeft=1
33
+ useTypeSeqs=true
34
+ useTypeSeqs2=true
35
+ useTypeySequences=true
36
+ useOccurrencePatterns=true
37
+ useLastRealWord=true
38
+ useNextRealWord=true
39
+ #useReverse=false
40
+ normalize=true
41
+ # normalizeTimex=true
42
+ wordShape=chris2useLC
43
+ useDisjunctive=true
44
+ disjunctionWidth=5
45
+ #useDisjunctiveShapeInteraction=true
46
+
47
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
48
+
49
+ useObservedSequencesOnly=true
50
+
51
+ useQN = true
52
+ QNsize = 25
53
+
54
+ # makes it go faster
55
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.crf.ser.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8025840c0c214e9787c96f4904e1108634d8321d01f85807a7e5a03ccb86c0ac
3
+ size 16787019
edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.prop ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
2
+ testFile = /u/nlp/data/ner/column_data/conll.testa
3
+ serializeTo = english.nowiki.3class.nodistsim.crf.ser.gz
4
+
5
+ type = crf
6
+
7
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
8
+
9
+ #distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
10
+ distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200.pruned
11
+ useDistSim = false
12
+
13
+ map = word=0,answer=1
14
+
15
+ saveFeatureIndexToDisk = true
16
+
17
+ useClassFeature=true
18
+ useWord=true
19
+ #useWordPairs=true
20
+ useNGrams=true
21
+ noMidNGrams=true
22
+ maxNGramLeng=6
23
+ usePrev=true
24
+ useNext=true
25
+ #useTags=true
26
+ #useWordTag=true
27
+ useLongSequences=true
28
+ useSequences=true
29
+ usePrevSequences=true
30
+ maxLeft=1
31
+ useTypeSeqs=true
32
+ useTypeSeqs2=true
33
+ useTypeySequences=true
34
+ useOccurrencePatterns=true
35
+ useLastRealWord=true
36
+ useNextRealWord=true
37
+ #useReverse=false
38
+ normalize=true
39
+ # normalizeTimex=true
40
+ wordShape=chris2useLC
41
+ useDisjunctive=true
42
+ disjunctionWidth=5
43
+ #useDisjunctiveShapeInteraction=true
44
+
45
+ readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
46
+
47
+ useObservedSequencesOnly=true
48
+
49
+ useQN = true
50
+ QNsize = 25
51
+
52
+ # makes it go faster
53
+ featureDiffThresh=0.05
edu/stanford/nlp/models/ner/regexner.patterns ADDED
The diff for this file is too large to render. See raw diff
 
edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c351da938e9d86e7cd36f2d29e8405b069e21d1c48ec0e579764e818b892a53
3
+ size 12409329
edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger.props ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## tagger training invoked at Tue Feb 25 04:12:25 PST 2014 with arguments:
2
+ model = english-left3words-distsim.tagger
3
+ arch = left3words,naacl2003unknowns,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1)
4
+ wordFunction = edu.stanford.nlp.process.AmericanizeFunction
5
+ trainFile = /u/nlp/data/pos-tagger/english/train-wsj-0-18;/u/nlp/data/pos-tagger/english/train-extra-english;/u/nlp/data/pos-tagger/english/train-tech-english
6
+ closedClassTags =
7
+ closedClassTagThreshold = 40
8
+ curWordMinFeatureThresh = 2
9
+ debug = false
10
+ debugPrefix =
11
+ tagSeparator = _
12
+ encoding = UTF-8
13
+ iterations = 100
14
+ lang = english
15
+ learnClosedClassTags = false
16
+ minFeatureThresh = 2
17
+ openClassTags =
18
+ rareWordMinFeatureThresh = 10
19
+ rareWordThresh = 5
20
+ search = owlqn
21
+ sgml = false
22
+ sigmaSquared = 0.0
23
+ regL1 = 0.75
24
+ tagInside =
25
+ tokenize = true
26
+ tokenizerFactory =
27
+ tokenizerOptions =
28
+ verbose = false
29
+ verboseResults = true
30
+ veryCommonWordThresh = 250
31
+ xmlInput =
32
+ outputFile =
33
+ outputFormat = slashTags
34
+ outputFormatOptions =
35
+ nthreads = 1
jar-in-jar-loader.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d2bd671c54a211880d614022931ef2188a03f98272d8fc17f9b3217ac606b7
3
+ size 7269
libs/NeuralNetwork.jar ADDED
Binary file (43.7 kB). View file
 
libs/SMOTE.jar ADDED
Binary file (8.96 kB). View file
 
libs/commons-cli-1.4.jar ADDED
Binary file (53.8 kB). View file
 
libs/excel/commons-codec-1.10.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4241dfa94e711d435f29a4604a3e2de5c4aa3c165e23bd066be6fc1fc4309569
3
+ size 284184
libs/excel/commons-collections4-4.1.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1fe8b5968b57d8465425357ed2d9dc695504518bed2df5b565c4b8e68c1c8a5
3
+ size 751238
libs/excel/commons-logging-1.2.jar ADDED
Binary file (61.8 kB). View file
 
libs/excel/curvesapi-1.04.jar ADDED
Binary file (98.4 kB). View file
 
libs/excel/junit-4.12.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59721f0805e223d84b90677887d9ff567dc534d7c502ca903c0c2b17f05c116a
3
+ size 314932
libs/excel/log4j-1.2.17.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d31696445697720527091754369082a6651bd49781b6005deb94e56753406f9
3
+ size 489884
libs/excel/poi-3.17.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30181821dd2e849727b638b9e329aeff4a64f3445c4142b13cf7a18bb3552edd
3
+ size 2701171
libs/excel/poi-examples-3.17.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc36c537277d06ea79bd4a231b00d5a48c9b44a3e1a14b6ddb68a6e1a4a335c6
3
+ size 374509
libs/excel/poi-excelant-3.17.jar ADDED
Binary file (31.2 kB). View file
 
libs/excel/poi-ooxml-3.17.jar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac915547ea3b775a810cc26149711682a23404e3d6d0b239915ac0c9305ee3c3
3
+ size 1479023