SentiSE
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .classpath +31 -0
- .gitattributes +25 -0
- .gitignore +25 -0
- .project +17 -0
- .settings/org.eclipse.jdt.core.prefs +12 -0
- LICENSE +165 -0
- README.md +27 -0
- bin/.gitignore +3 -0
- build.xml +77 -0
- edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz +3 -0
- edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.prop +61 -0
- edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.all.3class.distsim.prop +58 -0
- edu/stanford/nlp/models/ner/english.all.3class.nodistsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.all.3class.nodistsim.prop +53 -0
- edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.prop +61 -0
- edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.conll.4class.distsim.prop +63 -0
- edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.prop +62 -0
- edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.prop +55 -0
- edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.muc.7class.distsim.prop +54 -0
- edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.prop +52 -0
- edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.prop +55 -0
- edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.crf.ser.gz +3 -0
- edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.prop +53 -0
- edu/stanford/nlp/models/ner/regexner.patterns +0 -0
- edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger +3 -0
- edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger.props +35 -0
- jar-in-jar-loader.zip +3 -0
- libs/NeuralNetwork.jar +0 -0
- libs/SMOTE.jar +0 -0
- libs/commons-cli-1.4.jar +0 -0
- libs/excel/commons-codec-1.10.jar +3 -0
- libs/excel/commons-collections4-4.1.jar +3 -0
- libs/excel/commons-logging-1.2.jar +0 -0
- libs/excel/curvesapi-1.04.jar +0 -0
- libs/excel/junit-4.12.jar +3 -0
- libs/excel/log4j-1.2.17.jar +3 -0
- libs/excel/poi-3.17.jar +3 -0
- libs/excel/poi-examples-3.17.jar +3 -0
- libs/excel/poi-excelant-3.17.jar +0 -0
- libs/excel/poi-ooxml-3.17.jar +3 -0
.classpath
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<classpath>
|
| 3 |
+
<classpathentry kind="src" path="src"/>
|
| 4 |
+
<classpathentry kind="lib" path="libs/excel/commons-codec-1.10.jar"/>
|
| 5 |
+
<classpathentry kind="lib" path="libs/excel/commons-collections4-4.1.jar"/>
|
| 6 |
+
<classpathentry kind="lib" path="libs/excel/commons-logging-1.2.jar"/>
|
| 7 |
+
<classpathentry kind="lib" path="libs/excel/curvesapi-1.04.jar"/>
|
| 8 |
+
<classpathentry kind="lib" path="libs/excel/junit-4.12.jar"/>
|
| 9 |
+
<classpathentry kind="lib" path="libs/excel/log4j-1.2.17.jar"/>
|
| 10 |
+
<classpathentry kind="lib" path="libs/excel/poi-3.17.jar"/>
|
| 11 |
+
<classpathentry kind="lib" path="libs/excel/poi-examples-3.17.jar"/>
|
| 12 |
+
<classpathentry kind="lib" path="libs/excel/poi-excelant-3.17.jar"/>
|
| 13 |
+
<classpathentry kind="lib" path="libs/excel/poi-ooxml-3.17.jar"/>
|
| 14 |
+
<classpathentry kind="lib" path="libs/excel/poi-ooxml-schemas-3.17.jar"/>
|
| 15 |
+
<classpathentry kind="lib" path="libs/excel/poi-scratchpad-3.17.jar"/>
|
| 16 |
+
<classpathentry kind="lib" path="libs/excel/xmlbeans-2.6.0.jar"/>
|
| 17 |
+
<classpathentry kind="lib" path="libs/mysql-connector-java-5.1.41.jar"/>
|
| 18 |
+
<classpathentry kind="lib" path="libs/SMOTE.jar"/>
|
| 19 |
+
<classpathentry kind="lib" path="libs/snowball-stemmer-1.3.0.581.1.jar"/>
|
| 20 |
+
<classpathentry kind="lib" path="libs/stanford-corenlp-3.8.0.jar"/>
|
| 21 |
+
<classpathentry kind="lib" path="libs/stanford-parser.jar"/>
|
| 22 |
+
<classpathentry kind="lib" path="libs/stanford-postagger.jar"/>
|
| 23 |
+
<classpathentry kind="lib" path="libs/weka-src.jar"/>
|
| 24 |
+
<classpathentry kind="lib" path="libs/weka.jar"/>
|
| 25 |
+
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
| 26 |
+
<classpathentry kind="lib" path="libs/commons-cli-1.4.jar"/>
|
| 27 |
+
<classpathentry kind="lib" path="libs/mtj-1.0-snapshot.jar"/>
|
| 28 |
+
<classpathentry kind="lib" path="libs/NeuralNetwork.jar"/>
|
| 29 |
+
<classpathentry kind="lib" path="libs/opencsv-2.3.jar"/>
|
| 30 |
+
<classpathentry kind="output" path="bin"/>
|
| 31 |
+
</classpath>
|
.gitattributes
CHANGED
|
@@ -33,3 +33,28 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
libs/excel/commons-codec-1.10.jar filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
libs/excel/commons-collections4-4.1.jar filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
libs/excel/junit-4.12.jar filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
libs/excel/log4j-1.2.17.jar filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
libs/excel/poi-3.17.jar filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
libs/excel/poi-examples-3.17.jar filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
libs/excel/poi-ooxml-3.17.jar filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
libs/excel/poi-ooxml-schemas-3.17.jar filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
libs/excel/poi-scratchpad-3.17.jar filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
libs/excel/xmlbeans-2.6.0.jar filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
libs/mtj-1.0-snapshot.jar filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
libs/mysql-connector-java-5.1.41.jar filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
libs/stanford-corenlp-3.8.0.jar filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
libs/stanford-parser.jar filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
libs/stanford-postagger.jar filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
libs/weka-src.jar filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
libs/weka.jar filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
models/sentise-oracle-short.xlsx filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
models/sentise-oracle1.xlsx filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
models/sentise-oracle2.xlsx filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
models/SentiWordNet_3.0.0_20130122.txt filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
sentise.jar filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
src/taggers/bidirectional-distsim-wsj-0-18.tagger filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
src/taggers/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Compiled class file
|
| 2 |
+
*.class
|
| 3 |
+
|
| 4 |
+
# Log file
|
| 5 |
+
*.log
|
| 6 |
+
|
| 7 |
+
# BlueJ files
|
| 8 |
+
*.ctxt
|
| 9 |
+
|
| 10 |
+
# Mobile Tools for Java (J2ME)
|
| 11 |
+
.mtj.tmp/
|
| 12 |
+
|
| 13 |
+
# Package Files #
|
| 14 |
+
*.war
|
| 15 |
+
*.nar
|
| 16 |
+
*.ear
|
| 17 |
+
*.csss
|
| 18 |
+
*.css
|
| 19 |
+
*.html
|
| 20 |
+
*.tar.gz
|
| 21 |
+
*.rar
|
| 22 |
+
*.scss
|
| 23 |
+
|
| 24 |
+
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
| 25 |
+
hs_err_pid*
|
.project
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<projectDescription>
|
| 3 |
+
<name>SentiSE</name>
|
| 4 |
+
<comment></comment>
|
| 5 |
+
<projects>
|
| 6 |
+
</projects>
|
| 7 |
+
<buildSpec>
|
| 8 |
+
<buildCommand>
|
| 9 |
+
<name>org.eclipse.jdt.core.javabuilder</name>
|
| 10 |
+
<arguments>
|
| 11 |
+
</arguments>
|
| 12 |
+
</buildCommand>
|
| 13 |
+
</buildSpec>
|
| 14 |
+
<natures>
|
| 15 |
+
<nature>org.eclipse.jdt.core.javanature</nature>
|
| 16 |
+
</natures>
|
| 17 |
+
</projectDescription>
|
.settings/org.eclipse.jdt.core.prefs
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
eclipse.preferences.version=1
|
| 2 |
+
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
|
| 3 |
+
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
|
| 4 |
+
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
|
| 5 |
+
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
|
| 6 |
+
org.eclipse.jdt.core.compiler.compliance=1.8
|
| 7 |
+
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
|
| 8 |
+
org.eclipse.jdt.core.compiler.debug.localVariable=generate
|
| 9 |
+
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
|
| 10 |
+
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
|
| 11 |
+
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
|
| 12 |
+
org.eclipse.jdt.core.compiler.source=1.8
|
LICENSE
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GNU LESSER GENERAL PUBLIC LICENSE
|
| 2 |
+
Version 3, 29 June 2007
|
| 3 |
+
|
| 4 |
+
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
| 5 |
+
Everyone is permitted to copy and distribute verbatim copies
|
| 6 |
+
of this license document, but changing it is not allowed.
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
This version of the GNU Lesser General Public License incorporates
|
| 10 |
+
the terms and conditions of version 3 of the GNU General Public
|
| 11 |
+
License, supplemented by the additional permissions listed below.
|
| 12 |
+
|
| 13 |
+
0. Additional Definitions.
|
| 14 |
+
|
| 15 |
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
| 16 |
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
| 17 |
+
General Public License.
|
| 18 |
+
|
| 19 |
+
"The Library" refers to a covered work governed by this License,
|
| 20 |
+
other than an Application or a Combined Work as defined below.
|
| 21 |
+
|
| 22 |
+
An "Application" is any work that makes use of an interface provided
|
| 23 |
+
by the Library, but which is not otherwise based on the Library.
|
| 24 |
+
Defining a subclass of a class defined by the Library is deemed a mode
|
| 25 |
+
of using an interface provided by the Library.
|
| 26 |
+
|
| 27 |
+
A "Combined Work" is a work produced by combining or linking an
|
| 28 |
+
Application with the Library. The particular version of the Library
|
| 29 |
+
with which the Combined Work was made is also called the "Linked
|
| 30 |
+
Version".
|
| 31 |
+
|
| 32 |
+
The "Minimal Corresponding Source" for a Combined Work means the
|
| 33 |
+
Corresponding Source for the Combined Work, excluding any source code
|
| 34 |
+
for portions of the Combined Work that, considered in isolation, are
|
| 35 |
+
based on the Application, and not on the Linked Version.
|
| 36 |
+
|
| 37 |
+
The "Corresponding Application Code" for a Combined Work means the
|
| 38 |
+
object code and/or source code for the Application, including any data
|
| 39 |
+
and utility programs needed for reproducing the Combined Work from the
|
| 40 |
+
Application, but excluding the System Libraries of the Combined Work.
|
| 41 |
+
|
| 42 |
+
1. Exception to Section 3 of the GNU GPL.
|
| 43 |
+
|
| 44 |
+
You may convey a covered work under sections 3 and 4 of this License
|
| 45 |
+
without being bound by section 3 of the GNU GPL.
|
| 46 |
+
|
| 47 |
+
2. Conveying Modified Versions.
|
| 48 |
+
|
| 49 |
+
If you modify a copy of the Library, and, in your modifications, a
|
| 50 |
+
facility refers to a function or data to be supplied by an Application
|
| 51 |
+
that uses the facility (other than as an argument passed when the
|
| 52 |
+
facility is invoked), then you may convey a copy of the modified
|
| 53 |
+
version:
|
| 54 |
+
|
| 55 |
+
a) under this License, provided that you make a good faith effort to
|
| 56 |
+
ensure that, in the event an Application does not supply the
|
| 57 |
+
function or data, the facility still operates, and performs
|
| 58 |
+
whatever part of its purpose remains meaningful, or
|
| 59 |
+
|
| 60 |
+
b) under the GNU GPL, with none of the additional permissions of
|
| 61 |
+
this License applicable to that copy.
|
| 62 |
+
|
| 63 |
+
3. Object Code Incorporating Material from Library Header Files.
|
| 64 |
+
|
| 65 |
+
The object code form of an Application may incorporate material from
|
| 66 |
+
a header file that is part of the Library. You may convey such object
|
| 67 |
+
code under terms of your choice, provided that, if the incorporated
|
| 68 |
+
material is not limited to numerical parameters, data structure
|
| 69 |
+
layouts and accessors, or small macros, inline functions and templates
|
| 70 |
+
(ten or fewer lines in length), you do both of the following:
|
| 71 |
+
|
| 72 |
+
a) Give prominent notice with each copy of the object code that the
|
| 73 |
+
Library is used in it and that the Library and its use are
|
| 74 |
+
covered by this License.
|
| 75 |
+
|
| 76 |
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
| 77 |
+
document.
|
| 78 |
+
|
| 79 |
+
4. Combined Works.
|
| 80 |
+
|
| 81 |
+
You may convey a Combined Work under terms of your choice that,
|
| 82 |
+
taken together, effectively do not restrict modification of the
|
| 83 |
+
portions of the Library contained in the Combined Work and reverse
|
| 84 |
+
engineering for debugging such modifications, if you also do each of
|
| 85 |
+
the following:
|
| 86 |
+
|
| 87 |
+
a) Give prominent notice with each copy of the Combined Work that
|
| 88 |
+
the Library is used in it and that the Library and its use are
|
| 89 |
+
covered by this License.
|
| 90 |
+
|
| 91 |
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
| 92 |
+
document.
|
| 93 |
+
|
| 94 |
+
c) For a Combined Work that displays copyright notices during
|
| 95 |
+
execution, include the copyright notice for the Library among
|
| 96 |
+
these notices, as well as a reference directing the user to the
|
| 97 |
+
copies of the GNU GPL and this license document.
|
| 98 |
+
|
| 99 |
+
d) Do one of the following:
|
| 100 |
+
|
| 101 |
+
0) Convey the Minimal Corresponding Source under the terms of this
|
| 102 |
+
License, and the Corresponding Application Code in a form
|
| 103 |
+
suitable for, and under terms that permit, the user to
|
| 104 |
+
recombine or relink the Application with a modified version of
|
| 105 |
+
the Linked Version to produce a modified Combined Work, in the
|
| 106 |
+
manner specified by section 6 of the GNU GPL for conveying
|
| 107 |
+
Corresponding Source.
|
| 108 |
+
|
| 109 |
+
1) Use a suitable shared library mechanism for linking with the
|
| 110 |
+
Library. A suitable mechanism is one that (a) uses at run time
|
| 111 |
+
a copy of the Library already present on the user's computer
|
| 112 |
+
system, and (b) will operate properly with a modified version
|
| 113 |
+
of the Library that is interface-compatible with the Linked
|
| 114 |
+
Version.
|
| 115 |
+
|
| 116 |
+
e) Provide Installation Information, but only if you would otherwise
|
| 117 |
+
be required to provide such information under section 6 of the
|
| 118 |
+
GNU GPL, and only to the extent that such information is
|
| 119 |
+
necessary to install and execute a modified version of the
|
| 120 |
+
Combined Work produced by recombining or relinking the
|
| 121 |
+
Application with a modified version of the Linked Version. (If
|
| 122 |
+
you use option 4d0, the Installation Information must accompany
|
| 123 |
+
the Minimal Corresponding Source and Corresponding Application
|
| 124 |
+
Code. If you use option 4d1, you must provide the Installation
|
| 125 |
+
Information in the manner specified by section 6 of the GNU GPL
|
| 126 |
+
for conveying Corresponding Source.)
|
| 127 |
+
|
| 128 |
+
5. Combined Libraries.
|
| 129 |
+
|
| 130 |
+
You may place library facilities that are a work based on the
|
| 131 |
+
Library side by side in a single library together with other library
|
| 132 |
+
facilities that are not Applications and are not covered by this
|
| 133 |
+
License, and convey such a combined library under terms of your
|
| 134 |
+
choice, if you do both of the following:
|
| 135 |
+
|
| 136 |
+
a) Accompany the combined library with a copy of the same work based
|
| 137 |
+
on the Library, uncombined with any other library facilities,
|
| 138 |
+
conveyed under the terms of this License.
|
| 139 |
+
|
| 140 |
+
b) Give prominent notice with the combined library that part of it
|
| 141 |
+
is a work based on the Library, and explaining where to find the
|
| 142 |
+
accompanying uncombined form of the same work.
|
| 143 |
+
|
| 144 |
+
6. Revised Versions of the GNU Lesser General Public License.
|
| 145 |
+
|
| 146 |
+
The Free Software Foundation may publish revised and/or new versions
|
| 147 |
+
of the GNU Lesser General Public License from time to time. Such new
|
| 148 |
+
versions will be similar in spirit to the present version, but may
|
| 149 |
+
differ in detail to address new problems or concerns.
|
| 150 |
+
|
| 151 |
+
Each version is given a distinguishing version number. If the
|
| 152 |
+
Library as you received it specifies that a certain numbered version
|
| 153 |
+
of the GNU Lesser General Public License "or any later version"
|
| 154 |
+
applies to it, you have the option of following the terms and
|
| 155 |
+
conditions either of that published version or of any later version
|
| 156 |
+
published by the Free Software Foundation. If the Library as you
|
| 157 |
+
received it does not specify a version number of the GNU Lesser
|
| 158 |
+
General Public License, you may choose any version of the GNU Lesser
|
| 159 |
+
General Public License ever published by the Free Software Foundation.
|
| 160 |
+
|
| 161 |
+
If the Library as you received it specifies that a proxy can decide
|
| 162 |
+
whether future versions of the GNU Lesser General Public License shall
|
| 163 |
+
apply, that proxy's public statement of acceptance of any version is
|
| 164 |
+
permanent authorization for you to choose that version for the
|
| 165 |
+
Library.
|
README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SentiSE
|
| 2 |
+
SentiSE is a sentiment analysis tool for Software Engineering interactions
|
| 3 |
+
|
| 4 |
+
SentiSE, a supervised learning based sentiment analysis tool that incorporates ten supervised learning algorithms and
|
| 5 |
+
fourteen different optional pre-processing steps that are commonly used to improve the performance of sentiment analysis tools.
|
| 6 |
+
We empirically evaluated each of the algorithms and preprocessing steps to determine the best configuration.
|
| 7 |
+
We evaluated SentiSE using a large-scale labeled dataset of 13K comments from three different types of SE interactions.
|
| 8 |
+
<br><br>
|
| 9 |
+
**Performance Evaluation:**
|
| 10 |
+
We compare SentiSE with other sentiment analysis tool available in software engineering domain. We use two dataset for this evaluation. _Orcale1_ with 13 k labeled dataset with 21% positive, 60% neutral and 19% negative data and _Oracle2_ with 30% positive, 40% neutral and 30% negative data. Table bellow shows the performance comparison:
|
| 11 |
+
|
| 12 |
+
| Oracle | tool |Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Accuracy | Weighted <br> Kappa |
|
| 13 |
+
|--|--| -- | -- | -- | -- | --| -- | -- | --| -- | -- | --|
|
| 14 |
+
|_Orcal1_|SentiSE | 85.63% | 75.27% | 80.11% | 81.51% | 92.78% | 86.78% | 81.03% | 55.92%|66.16%|82.23%|0.681
|
| 15 |
+
|_Oracle1_| SentiCR | 81.81% | 76.59%| 79.04%| 80.04% | 92.77% | 85.92% | 82.71% | 46.38% | 59.40% | 80.6655% | 0.647
|
| 16 |
+
|_Oracle1_| SentiStrength-SE | 75.81% | 81.45% | 78.53% | 84.68% | 83.64% | 84.16% | 66.50% | 63.42% | 64.92% | 79.32% | 0.6587
|
| 17 |
+
|_Oracle2_| SentiSE | 88.83% | 85.09% | 86.92% | 86.62% | 91.52% | 89.00% | 85.87% | 78.61% | 82.07% | 86.92% | 0.788
|
| 18 |
+
|_Oracle2_|SentiCR | 84.32% | 84.73% | 84.50% | 80.70% | 92.08% | 86.00% | 86.45% | 59.49\% | 70.40% | 82.47% | 0.716
|
| 19 |
+
|_Oracle2_| SentiStrength-SE | 79.56% | 83.57% | 81.52% | 80.73% | 84.15% | 82.41% | 80.41% | 69.31% | 74.45% | 80.34% | 0. 696
|
| 20 |
+
|
| 21 |
+
<br><br><br>
|
| 22 |
+
**Usage Instructions:**
|
| 23 |
+
Downaload and import SentiSe Project. Run the build.xml file and generate the sentise.jar. SentiSE is a commandline base tool. Use the command <code> java -jar sentise.jar -help</code> to find all the commands avialable in sentiSE.
|
| 24 |
+
|
| 25 |
+
<br><br>
|
| 26 |
+
**ScreenShot**
|
| 27 |
+

|
bin/.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/edu/
|
| 2 |
+
/taggers/
|
| 3 |
+
/weka/
|
build.xml
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
| 2 |
+
<project default="jar" name="SentiSE Builder" >
|
| 3 |
+
<property name="src.dir" value="src"/>
|
| 4 |
+
|
| 5 |
+
<property name="build.dir" value="build"/>
|
| 6 |
+
<property name="classes.dir" value="${build.dir}/bin"/>
|
| 7 |
+
<property name="jar.dir" value="${class.dir}"/>
|
| 8 |
+
<property name="dir.jarfile" value="./"/>
|
| 9 |
+
<property name="main-class" value="edu.siu.sentise.SentiSE"/>
|
| 10 |
+
|
| 11 |
+
<path id="libs-class">
|
| 12 |
+
<fileset dir="./libs">
|
| 13 |
+
<include name="*.jar"/>
|
| 14 |
+
</fileset>
|
| 15 |
+
</path>
|
| 16 |
+
|
| 17 |
+
<path id="excel-class">
|
| 18 |
+
<fileset dir="./libs/excel">
|
| 19 |
+
<include name="*.jar"/>
|
| 20 |
+
</fileset>
|
| 21 |
+
</path>
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
<target name="clean">
|
| 25 |
+
<delete dir="${build.dir}"/>
|
| 26 |
+
</target>
|
| 27 |
+
|
| 28 |
+
<target name="compile">
|
| 29 |
+
<mkdir dir="${classes.dir}"/>
|
| 30 |
+
<mkdir dir="${build.dir}"/>
|
| 31 |
+
|
| 32 |
+
<javac destdir="${classes.dir}" includeantruntime="false" >
|
| 33 |
+
<classpath refid="libs-class"/>
|
| 34 |
+
<classpath refid="excel-class"/>
|
| 35 |
+
<src path="${src.dir}"/>
|
| 36 |
+
|
| 37 |
+
</javac>
|
| 38 |
+
</target>
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
<target name="jar" depends="compile">
|
| 43 |
+
<jar destfile="${jar.dir}/sentise.jar">
|
| 44 |
+
<manifest>
|
| 45 |
+
<attribute name="Main-Class" value="org.eclipse.jdt.internal.jarinjarloader.JarRsrcLoader"/>
|
| 46 |
+
<attribute name="Rsrc-Main-Class" value="edu.sentise.SentiSE"/>
|
| 47 |
+
<attribute name="Class-Path" value="."/>
|
| 48 |
+
<attribute name="Rsrc-Class-Path" value="./ mysql-connector-java-5.1.41.jar SMOTE.jar snowball-stemmer-1.3.0.581.1.jar stanford-corenlp-3.8.0.jar stanford-parser.jar stanford-postagger.jar weka-src.jar weka.jar commons-codec-1.10.jar commons-collections4-4.1.jar junit-4.12.jar log4j-1.2.17.jar poi-3.17.jar poi-examples-3.17.jar poi-excelant-3.17.jar poi-ooxml-3.17.jar poi-ooxml-schemas-3.17.jar poi-scratchpad-3.17.jar xmlbeans-2.6.0.jar commons-cli-1.4.jar mtj-1.0-snapshot.jar opencsv-2.3.jar NeuralNetwork.jar"/>
|
| 49 |
+
</manifest>
|
| 50 |
+
<zipfileset src="jar-in-jar-loader.zip"/>
|
| 51 |
+
<fileset dir="${dir.jarfile}/bin"/>
|
| 52 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="mysql-connector-java-5.1.41.jar"/>
|
| 53 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="SMOTE.jar"/>
|
| 54 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="snowball-stemmer-1.3.0.581.1.jar"/>
|
| 55 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="stanford-corenlp-3.8.0.jar"/>
|
| 56 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="stanford-parser.jar"/>
|
| 57 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="stanford-postagger.jar"/>
|
| 58 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="weka-src.jar"/>
|
| 59 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="weka.jar"/>
|
| 60 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="commons-codec-1.10.jar"/>
|
| 61 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="commons-collections4-4.1.jar"/>
|
| 62 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="junit-4.12.jar"/>
|
| 63 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="log4j-1.2.17.jar"/>
|
| 64 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-3.17.jar"/>
|
| 65 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-examples-3.17.jar"/>
|
| 66 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-excelant-3.17.jar"/>
|
| 67 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-ooxml-3.17.jar"/>
|
| 68 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-ooxml-schemas-3.17.jar"/>
|
| 69 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-scratchpad-3.17.jar"/>
|
| 70 |
+
<zipfileset dir="${dir.jarfile}/libs/excel" includes="xmlbeans-2.6.0.jar"/>
|
| 71 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="commons-cli-1.4.jar"/>
|
| 72 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="mtj-1.0-snapshot.jar"/>
|
| 73 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="opencsv-2.3.jar"/>
|
| 74 |
+
<zipfileset dir="${dir.jarfile}/libs" includes="NeuralNetwork.jar"/>
|
| 75 |
+
</jar>
|
| 76 |
+
</target>
|
| 77 |
+
</project>
|
edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:defb4d91b899c4f92ea605f026c57408c4d3df1fb371b50f2d055ace782cbc1e
|
| 3 |
+
size 1769534
|
edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed674cce7311f76c018806e61cdb045e219258ee30014da4e3e686b9230ebea9
|
| 3 |
+
size 1883795
|
edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c9a504ad8f82f415ad64ce96478c59788355d8096edc96ccb8b43289739f2f1
|
| 3 |
+
size 37319863
|
edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.prop
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
|
| 2 |
+
trainFileList = /u/nlp/data/ner/column_data/ace23.3class.train,/u/nlp/data/ner/column_data/muc6.3class.ptb.train,/u/nlp/data/ner/column_data/muc7.3class.ptb.train,/u/nlp/data/ner/column_data/conll.3class.train,/u/nlp/data/ner/column_data/wikiner.3class.train,/u/nlp/data/ner/column_data/ontonotes.3class.train,/u/nlp/data/ner/column_data/english.extra.3class.train
|
| 3 |
+
testFiles = /u/nlp/data/ner/column_data/all.3class.test,/u/nlp/data/ner/column_data/all.3class.upper.test,/u/nlp/data/ner/column_data/all.3class.lower.test
|
| 4 |
+
# testFile = uppercase.tsv
|
| 5 |
+
serializeTo = english.all.3class.caseless.distsim.crf.ser.gz
|
| 6 |
+
|
| 7 |
+
type = crf
|
| 8 |
+
|
| 9 |
+
wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
|
| 10 |
+
useKnownLCWords = false
|
| 11 |
+
|
| 12 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
|
| 13 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
|
| 14 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
| 15 |
+
# right options for egw4-reut.512 (though effect of having or not is small)
|
| 16 |
+
numberEquivalenceDistSim = true
|
| 17 |
+
unknownWordDistSimClass = 0
|
| 18 |
+
useDistSim = true
|
| 19 |
+
|
| 20 |
+
map = word=0,answer=1
|
| 21 |
+
|
| 22 |
+
# saveFeatureIndexToDisk = true
|
| 23 |
+
|
| 24 |
+
useClassFeature=true
|
| 25 |
+
useWord=true
|
| 26 |
+
#useWordPairs=true
|
| 27 |
+
useNGrams=true
|
| 28 |
+
noMidNGrams=true
|
| 29 |
+
maxNGramLeng=6
|
| 30 |
+
usePrev=true
|
| 31 |
+
useNext=true
|
| 32 |
+
#useTags=true
|
| 33 |
+
#useWordTag=true
|
| 34 |
+
useLongSequences=true
|
| 35 |
+
useSequences=true
|
| 36 |
+
usePrevSequences=true
|
| 37 |
+
useTypeSeqs=true
|
| 38 |
+
useTypeSeqs2=true
|
| 39 |
+
useTypeySequences=true
|
| 40 |
+
useOccurrencePatterns=true
|
| 41 |
+
useLastRealWord=true
|
| 42 |
+
useNextRealWord=true
|
| 43 |
+
#useReverse=false
|
| 44 |
+
normalize=true
|
| 45 |
+
# normalizeTimex=true
|
| 46 |
+
wordShape=chris2useLC
|
| 47 |
+
useDisjunctive=true
|
| 48 |
+
disjunctionWidth=5
|
| 49 |
+
#useDisjunctiveShapeInteraction=true
|
| 50 |
+
|
| 51 |
+
maxLeft=1
|
| 52 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 53 |
+
|
| 54 |
+
useObservedSequencesOnly=true
|
| 55 |
+
|
| 56 |
+
useQN = true
|
| 57 |
+
QNsize = 25
|
| 58 |
+
|
| 59 |
+
# makes it go faster
|
| 60 |
+
featureDiffThresh=0.05
|
| 61 |
+
|
edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65905fe1bbba47d0ac2b468e63d91cfde72dce0a7261345e015c45fab9cff93f
|
| 3 |
+
size 31985042
|
edu/stanford/nlp/models/ner/english.all.3class.distsim.prop
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
|
| 2 |
+
trainFileList = /u/nlp/data/ner/column_data/ace23.3class.train,/u/nlp/data/ner/column_data/muc6.3class.ptb.train,/u/nlp/data/ner/column_data/muc7.3class.ptb.train,/u/nlp/data/ner/column_data/conll.3class.train,/u/nlp/data/ner/column_data/wikiner.3class.train,/u/nlp/data/ner/column_data/ontonotes.3class.train,/u/nlp/data/ner/column_data/english.extra.3class.train
|
| 3 |
+
testFile = /u/nlp/data/ner/column_data/all.3class.test
|
| 4 |
+
serializeTo = english.all.3class.distsim.crf.ser.gz
|
| 5 |
+
|
| 6 |
+
type = crf
|
| 7 |
+
|
| 8 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 9 |
+
|
| 10 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
|
| 11 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
|
| 12 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
| 13 |
+
# right options for egw4-reut.512 (though effect of having or not is small)
|
| 14 |
+
numberEquivalenceDistSim = true
|
| 15 |
+
unknownWordDistSimClass = 0
|
| 16 |
+
useDistSim = true
|
| 17 |
+
|
| 18 |
+
map = word=0,answer=1
|
| 19 |
+
|
| 20 |
+
saveFeatureIndexToDisk = true
|
| 21 |
+
|
| 22 |
+
useClassFeature=true
|
| 23 |
+
useWord=true
|
| 24 |
+
#useWordPairs=true
|
| 25 |
+
useNGrams=true
|
| 26 |
+
noMidNGrams=true
|
| 27 |
+
maxNGramLeng=6
|
| 28 |
+
usePrev=true
|
| 29 |
+
useNext=true
|
| 30 |
+
#useTags=true
|
| 31 |
+
#useWordTag=true
|
| 32 |
+
useLongSequences=true
|
| 33 |
+
useSequences=true
|
| 34 |
+
usePrevSequences=true
|
| 35 |
+
useTypeSeqs=true
|
| 36 |
+
useTypeSeqs2=true
|
| 37 |
+
useTypeySequences=true
|
| 38 |
+
useOccurrencePatterns=true
|
| 39 |
+
useLastRealWord=true
|
| 40 |
+
useNextRealWord=true
|
| 41 |
+
#useReverse=false
|
| 42 |
+
normalize=true
|
| 43 |
+
# normalizeTimex=true
|
| 44 |
+
wordShape=chris2useLC
|
| 45 |
+
useDisjunctive=true
|
| 46 |
+
disjunctionWidth=5
|
| 47 |
+
#useDisjunctiveShapeInteraction=true
|
| 48 |
+
|
| 49 |
+
maxLeft=1
|
| 50 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 51 |
+
|
| 52 |
+
useObservedSequencesOnly=true
|
| 53 |
+
|
| 54 |
+
useQN = true
|
| 55 |
+
QNsize = 25
|
| 56 |
+
|
| 57 |
+
# makes it go faster
|
| 58 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.all.3class.nodistsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77339de3c30111b12302d331d97b24304fdf537258e3b6007a1e6e96390ce9b2
|
| 3 |
+
size 23179051
|
edu/stanford/nlp/models/ner/english.all.3class.nodistsim.prop
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
|
| 2 |
+
testFile = /u/nlp/data/ner/column_data/all.3class.test
|
| 3 |
+
serializeTo = english.all.3class.nodistsim.crf.ser.gz
|
| 4 |
+
|
| 5 |
+
type = crf
|
| 6 |
+
|
| 7 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 8 |
+
|
| 9 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
|
| 10 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200.pruned
|
| 11 |
+
useDistSim = false
|
| 12 |
+
|
| 13 |
+
map = word=0,answer=1
|
| 14 |
+
|
| 15 |
+
saveFeatureIndexToDisk = true
|
| 16 |
+
|
| 17 |
+
useClassFeature=true
|
| 18 |
+
useWord=true
|
| 19 |
+
#useWordPairs=true
|
| 20 |
+
useNGrams=true
|
| 21 |
+
noMidNGrams=true
|
| 22 |
+
maxNGramLeng=6
|
| 23 |
+
usePrev=true
|
| 24 |
+
useNext=true
|
| 25 |
+
#useTags=true
|
| 26 |
+
#useWordTag=true
|
| 27 |
+
useLongSequences=true
|
| 28 |
+
useSequences=true
|
| 29 |
+
usePrevSequences=true
|
| 30 |
+
maxLeft=1
|
| 31 |
+
useTypeSeqs=true
|
| 32 |
+
useTypeSeqs2=true
|
| 33 |
+
useTypeySequences=true
|
| 34 |
+
useOccurrencePatterns=true
|
| 35 |
+
useLastRealWord=true
|
| 36 |
+
useNextRealWord=true
|
| 37 |
+
#useReverse=false
|
| 38 |
+
normalize=true
|
| 39 |
+
# normalizeTimex=true
|
| 40 |
+
wordShape=chris2useLC
|
| 41 |
+
useDisjunctive=true
|
| 42 |
+
disjunctionWidth=5
|
| 43 |
+
#useDisjunctiveShapeInteraction=true
|
| 44 |
+
|
| 45 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 46 |
+
|
| 47 |
+
useObservedSequencesOnly=true
|
| 48 |
+
|
| 49 |
+
useQN = true
|
| 50 |
+
QNsize = 25
|
| 51 |
+
|
| 52 |
+
# makes it go faster
|
| 53 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2073dcfff4061549fe6aa6b6fd22246ae6bad28c819c6806d690ad3c1e383c8d
|
| 3 |
+
size 21421444
|
edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.prop
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This is better than Jenny's either with or without distsim turned on
|
| 2 |
+
# And using iob2 is better for optimal CoNLL performance.
|
| 3 |
+
# Features titled "chris2009"
|
| 4 |
+
|
| 5 |
+
trainFile = /u/nlp/data/ner/column_data/conll.4class.train
|
| 6 |
+
# testFile = /u/nlp/data/ner/column_data/conll.4class.testa
|
| 7 |
+
serializeTo = english.conll.4class.caseless.distsim.crf.ser.gz
|
| 8 |
+
|
| 9 |
+
wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
|
| 10 |
+
useKnownLCWords = false
|
| 11 |
+
|
| 12 |
+
useDistSim = true
|
| 13 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
| 14 |
+
|
| 15 |
+
map = word=0,answer=1
|
| 16 |
+
|
| 17 |
+
saveFeatureIndexToDisk = true
|
| 18 |
+
|
| 19 |
+
useTitle = true
|
| 20 |
+
useClassFeature=true
|
| 21 |
+
useWord=true
|
| 22 |
+
# useWordPairs=true
|
| 23 |
+
useNGrams=true
|
| 24 |
+
noMidNGrams=true
|
| 25 |
+
# maxNGramLeng=6 # Having them all helps, which is the default
|
| 26 |
+
usePrev=true
|
| 27 |
+
useNext=true
|
| 28 |
+
# useTags=true
|
| 29 |
+
# useWordTag=true
|
| 30 |
+
useLongSequences=true
|
| 31 |
+
useSequences=true
|
| 32 |
+
usePrevSequences=true
|
| 33 |
+
maxLeft=1
|
| 34 |
+
useTypeSeqs=true
|
| 35 |
+
useTypeSeqs2=true
|
| 36 |
+
useTypeySequences=true
|
| 37 |
+
useOccurrencePatterns=true
|
| 38 |
+
useLastRealWord=true
|
| 39 |
+
useNextRealWord=true
|
| 40 |
+
#useReverse=false
|
| 41 |
+
normalize=true
|
| 42 |
+
# normalizeTimex=true
|
| 43 |
+
# dan2 better than chris2 on CoNLL data...
|
| 44 |
+
wordShape=dan2useLC
|
| 45 |
+
useDisjunctive=true
|
| 46 |
+
# disjunctionWidth 4 is better than 5 on CoNLL data
|
| 47 |
+
disjunctionWidth=4
|
| 48 |
+
#useDisjunctiveShapeInteraction=true
|
| 49 |
+
|
| 50 |
+
type=crf
|
| 51 |
+
|
| 52 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 53 |
+
|
| 54 |
+
useObservedSequencesOnly=true
|
| 55 |
+
|
| 56 |
+
sigma = 20
|
| 57 |
+
useQN = true
|
| 58 |
+
QNsize = 25
|
| 59 |
+
|
| 60 |
+
# makes it go faster
|
| 61 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f60c3630612f2c93420f191fec792f87a4f64aee38998324b7a1f8b5c90f7363
|
| 3 |
+
size 17803778
|
edu/stanford/nlp/models/ner/english.conll.4class.distsim.prop
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This is better than Jenny's either with or without distsim turned on
|
| 2 |
+
# And using iob2 is better for optimal CoNLL performance.
|
| 3 |
+
# Features titled "chris2009"
|
| 4 |
+
|
| 5 |
+
trainFile = /u/nlp/data/ner/column_data/conll.4class.train
|
| 6 |
+
# testFile = /u/nlp/data/ner/column_data/conll.4class.testa
|
| 7 |
+
serializeTo = english.conll.4class.distsim.crf.ser.gz
|
| 8 |
+
|
| 9 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 10 |
+
|
| 11 |
+
useDistSim = true
|
| 12 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
| 13 |
+
# right options for egw4-reut.512 (though effect of having or not is small)
|
| 14 |
+
numberEquivalenceDistSim = true
|
| 15 |
+
unknownWordDistSimClass = 0
|
| 16 |
+
|
| 17 |
+
map = word=0,answer=1
|
| 18 |
+
|
| 19 |
+
saveFeatureIndexToDisk = true
|
| 20 |
+
|
| 21 |
+
useTitle = true
|
| 22 |
+
useClassFeature=true
|
| 23 |
+
useWord=true
|
| 24 |
+
# useWordPairs=true
|
| 25 |
+
useNGrams=true
|
| 26 |
+
noMidNGrams=true
|
| 27 |
+
# maxNGramLeng=6 # Having them all helps, which is the default
|
| 28 |
+
usePrev=true
|
| 29 |
+
useNext=true
|
| 30 |
+
# useTags=true
|
| 31 |
+
# useWordTag=true
|
| 32 |
+
useLongSequences=true
|
| 33 |
+
useSequences=true
|
| 34 |
+
usePrevSequences=true
|
| 35 |
+
maxLeft=1
|
| 36 |
+
useTypeSeqs=true
|
| 37 |
+
useTypeSeqs2=true
|
| 38 |
+
useTypeySequences=true
|
| 39 |
+
useOccurrencePatterns=true
|
| 40 |
+
useLastRealWord=true
|
| 41 |
+
useNextRealWord=true
|
| 42 |
+
#useReverse=false
|
| 43 |
+
normalize=true
|
| 44 |
+
# normalizeTimex=true
|
| 45 |
+
# dan2 better than chris2 on CoNLL data...
|
| 46 |
+
wordShape=dan2useLC
|
| 47 |
+
useDisjunctive=true
|
| 48 |
+
# disjunctionWidth 4 is better than 5 on CoNLL data
|
| 49 |
+
disjunctionWidth=4
|
| 50 |
+
#useDisjunctiveShapeInteraction=true
|
| 51 |
+
|
| 52 |
+
type=crf
|
| 53 |
+
|
| 54 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 55 |
+
|
| 56 |
+
useObservedSequencesOnly=true
|
| 57 |
+
|
| 58 |
+
sigma = 20
|
| 59 |
+
useQN = true
|
| 60 |
+
QNsize = 25
|
| 61 |
+
|
| 62 |
+
# makes it go faster
|
| 63 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b14f1b2b25935f0c7b1499e3a910ec11d3bbb1e7350ed0f888b8f170fe38a90c
|
| 3 |
+
size 14860645
|
edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.prop
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This is better than Jenny's either with or without distsim turned on
|
| 2 |
+
# And using iob2 is better for optimal CoNLL performance.
|
| 3 |
+
# Features labeled "chris2009"
|
| 4 |
+
|
| 5 |
+
trainFile = /u/nlp/data/ner/column_data/conll.4class.train
|
| 6 |
+
# testFile = /u/nlp/data/ner/column_data/conll.4class.testa
|
| 7 |
+
serializeTo = english.conll.4class.nodistsim.crf.ser.gz
|
| 8 |
+
|
| 9 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 10 |
+
|
| 11 |
+
# This is good, but deliberately not used here
|
| 12 |
+
# useDistSim = true
|
| 13 |
+
useDistSim = false
|
| 14 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
|
| 15 |
+
|
| 16 |
+
map = word=0,answer=1
|
| 17 |
+
|
| 18 |
+
saveFeatureIndexToDisk = true
|
| 19 |
+
|
| 20 |
+
useTitle = true
|
| 21 |
+
useClassFeature=true
|
| 22 |
+
useWord=true
|
| 23 |
+
# useWordPairs=true
|
| 24 |
+
useNGrams=true
|
| 25 |
+
noMidNGrams=true
|
| 26 |
+
# maxNGramLeng=6 # Having them all helps, which is the default
|
| 27 |
+
usePrev=true
|
| 28 |
+
useNext=true
|
| 29 |
+
# useTags=true
|
| 30 |
+
# useWordTag=true
|
| 31 |
+
useLongSequences=true
|
| 32 |
+
useSequences=true
|
| 33 |
+
usePrevSequences=true
|
| 34 |
+
maxLeft=1
|
| 35 |
+
useTypeSeqs=true
|
| 36 |
+
useTypeSeqs2=true
|
| 37 |
+
useTypeySequences=true
|
| 38 |
+
useOccurrencePatterns=true
|
| 39 |
+
useLastRealWord=true
|
| 40 |
+
useNextRealWord=true
|
| 41 |
+
#useReverse=false
|
| 42 |
+
normalize=true
|
| 43 |
+
# normalizeTimex=true
|
| 44 |
+
# dan2 better than chris2 on CoNLL data...
|
| 45 |
+
wordShape=dan2useLC
|
| 46 |
+
useDisjunctive=true
|
| 47 |
+
# disjunctionWidth 4 is better than 5 on CoNLL data
|
| 48 |
+
disjunctionWidth=4
|
| 49 |
+
#useDisjunctiveShapeInteraction=true
|
| 50 |
+
|
| 51 |
+
type=crf
|
| 52 |
+
|
| 53 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 54 |
+
|
| 55 |
+
useObservedSequencesOnly=true
|
| 56 |
+
|
| 57 |
+
sigma = 20
|
| 58 |
+
useQN = true
|
| 59 |
+
QNsize = 25
|
| 60 |
+
|
| 61 |
+
# makes it go faster
|
| 62 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:821281c380d277749c2641d6694584beef533c9cf222d24f03a79d45c18e1291
|
| 3 |
+
size 20118311
|
edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.prop
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
trainFileList = /u/nlp/data/ner/column_data/muc6.ptb.train,/u/nlp/data/ner/column_data/muc7.ptb.train
|
| 2 |
+
# testFile = /u/nlp/data/ner/column_data/muc7.ptb.devtest
|
| 3 |
+
serializeTo = english.muc.7class.caseless.distsim.crf.ser.gz
|
| 4 |
+
|
| 5 |
+
type=crf
|
| 6 |
+
|
| 7 |
+
wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
|
| 8 |
+
useKnownLCWords = false
|
| 9 |
+
|
| 10 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
| 11 |
+
numberEquivalenceDistSim = true
|
| 12 |
+
unknownWordDistSimClass = 0
|
| 13 |
+
useDistSim = true
|
| 14 |
+
|
| 15 |
+
map = word=0,answer=1
|
| 16 |
+
|
| 17 |
+
saveFeatureIndexToDisk = true
|
| 18 |
+
|
| 19 |
+
useClassFeature=true
|
| 20 |
+
useWord=true
|
| 21 |
+
#useWordPairs=true
|
| 22 |
+
useNGrams=true
|
| 23 |
+
noMidNGrams=true
|
| 24 |
+
maxNGramLeng=6
|
| 25 |
+
usePrev=true
|
| 26 |
+
useNext=true
|
| 27 |
+
#useTags=true
|
| 28 |
+
#useWordTag=true
|
| 29 |
+
useLongSequences=true
|
| 30 |
+
useSequences=true
|
| 31 |
+
usePrevSequences=true
|
| 32 |
+
useTypeSeqs=true
|
| 33 |
+
useTypeSeqs2=true
|
| 34 |
+
useTypeySequences=true
|
| 35 |
+
useOccurrencePatterns=true
|
| 36 |
+
useLastRealWord=true
|
| 37 |
+
useNextRealWord=true
|
| 38 |
+
#useReverse=false
|
| 39 |
+
normalize=true
|
| 40 |
+
# normalizeTimex=true
|
| 41 |
+
wordShape=chris2useLC
|
| 42 |
+
useDisjunctive=true
|
| 43 |
+
disjunctionWidth=5
|
| 44 |
+
#useDisjunctiveShapeInteraction=true
|
| 45 |
+
|
| 46 |
+
maxLeft=1
|
| 47 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 48 |
+
|
| 49 |
+
useObservedSequencesOnly=true
|
| 50 |
+
|
| 51 |
+
useQN = true
|
| 52 |
+
QNsize = 25
|
| 53 |
+
|
| 54 |
+
# makes it go faster
|
| 55 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2d7308d176718cdd0e7a062aaaf5e94d2c4bb8c9b787f2177196dcce1db5d0c
|
| 3 |
+
size 17859765
|
edu/stanford/nlp/models/ner/english.muc.7class.distsim.prop
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
trainFileList = /u/nlp/data/ner/column_data/muc6.ptb.train,/u/nlp/data/ner/column_data/muc7.ptb.train
|
| 2 |
+
# testFile = /u/nlp/data/ner/column_data/muc7.ptb.devtest
|
| 3 |
+
serializeTo = english.muc.7class.distsim.crf.ser.gz
|
| 4 |
+
|
| 5 |
+
type=crf
|
| 6 |
+
|
| 7 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 8 |
+
|
| 9 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
| 10 |
+
numberEquivalenceDistSim = true
|
| 11 |
+
unknownWordDistSimClass = 0
|
| 12 |
+
useDistSim = true
|
| 13 |
+
|
| 14 |
+
map = word=0,answer=1
|
| 15 |
+
|
| 16 |
+
saveFeatureIndexToDisk = true
|
| 17 |
+
|
| 18 |
+
useClassFeature=true
|
| 19 |
+
useWord=true
|
| 20 |
+
#useWordPairs=true
|
| 21 |
+
useNGrams=true
|
| 22 |
+
noMidNGrams=true
|
| 23 |
+
maxNGramLeng=6
|
| 24 |
+
usePrev=true
|
| 25 |
+
useNext=true
|
| 26 |
+
#useTags=true
|
| 27 |
+
#useWordTag=true
|
| 28 |
+
useLongSequences=true
|
| 29 |
+
useSequences=true
|
| 30 |
+
usePrevSequences=true
|
| 31 |
+
useTypeSeqs=true
|
| 32 |
+
useTypeSeqs2=true
|
| 33 |
+
useTypeySequences=true
|
| 34 |
+
useOccurrencePatterns=true
|
| 35 |
+
useLastRealWord=true
|
| 36 |
+
useNextRealWord=true
|
| 37 |
+
#useReverse=false
|
| 38 |
+
normalize=true
|
| 39 |
+
# normalizeTimex=true
|
| 40 |
+
wordShape=chris2useLC
|
| 41 |
+
useDisjunctive=true
|
| 42 |
+
disjunctionWidth=5
|
| 43 |
+
#useDisjunctiveShapeInteraction=true
|
| 44 |
+
|
| 45 |
+
maxLeft=1
|
| 46 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 47 |
+
|
| 48 |
+
useObservedSequencesOnly=true
|
| 49 |
+
|
| 50 |
+
useQN = true
|
| 51 |
+
QNsize = 25
|
| 52 |
+
|
| 53 |
+
# makes it go faster
|
| 54 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40f1e3940b1424ffc699c2a3d74274e53b317d31532f4f955ca811d2a6e5b660
|
| 3 |
+
size 14494199
|
edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.prop
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
trainFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.train
|
| 2 |
+
testFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.test
|
| 3 |
+
serializeTo = english.muc.7class.nodistsim.crf.ser.gz
|
| 4 |
+
|
| 5 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 6 |
+
|
| 7 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
|
| 8 |
+
useDistSim = false
|
| 9 |
+
|
| 10 |
+
map = word=0,answer=1
|
| 11 |
+
|
| 12 |
+
saveFeatureIndexToDisk = true
|
| 13 |
+
|
| 14 |
+
useClassFeature=true
|
| 15 |
+
useWord=true
|
| 16 |
+
#useWordPairs=true
|
| 17 |
+
useNGrams=true
|
| 18 |
+
noMidNGrams=true
|
| 19 |
+
maxNGramLeng=6
|
| 20 |
+
usePrev=true
|
| 21 |
+
useNext=true
|
| 22 |
+
#useTags=true
|
| 23 |
+
#useWordTag=true
|
| 24 |
+
useLongSequences=true
|
| 25 |
+
useSequences=true
|
| 26 |
+
usePrevSequences=true
|
| 27 |
+
maxLeft=1
|
| 28 |
+
useTypeSeqs=true
|
| 29 |
+
useTypeSeqs2=true
|
| 30 |
+
useTypeySequences=true
|
| 31 |
+
useOccurrencePatterns=true
|
| 32 |
+
useLastRealWord=true
|
| 33 |
+
useNextRealWord=true
|
| 34 |
+
#useReverse=false
|
| 35 |
+
normalize=true
|
| 36 |
+
# normalizeTimex=true
|
| 37 |
+
wordShape=chris2useLC
|
| 38 |
+
useDisjunctive=true
|
| 39 |
+
disjunctionWidth=5
|
| 40 |
+
#useDisjunctiveShapeInteraction=true
|
| 41 |
+
|
| 42 |
+
type=crf
|
| 43 |
+
|
| 44 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 45 |
+
|
| 46 |
+
useObservedSequencesOnly=true
|
| 47 |
+
|
| 48 |
+
useQN = true
|
| 49 |
+
QNsize = 25
|
| 50 |
+
|
| 51 |
+
# makes it go faster
|
| 52 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:645677c2646791ac2a07b29be12af60cff84ea02936ac375af8b03939eb30adf
|
| 3 |
+
size 20284974
|
edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.prop
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
|
| 2 |
+
testFile = /u/nlp/data/ner/column_data/conll.testa
|
| 3 |
+
serializeTo = english.nowiki.3class.caseless.distsim.crf.ser.gz
|
| 4 |
+
|
| 5 |
+
type = crf
|
| 6 |
+
|
| 7 |
+
wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
|
| 8 |
+
useKnownLCWords = false
|
| 9 |
+
|
| 10 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
|
| 11 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
|
| 12 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
| 13 |
+
useDistSim = true
|
| 14 |
+
|
| 15 |
+
map = word=0,answer=1
|
| 16 |
+
|
| 17 |
+
saveFeatureIndexToDisk = true
|
| 18 |
+
|
| 19 |
+
useClassFeature=true
|
| 20 |
+
useWord=true
|
| 21 |
+
#useWordPairs=true
|
| 22 |
+
useNGrams=true
|
| 23 |
+
noMidNGrams=true
|
| 24 |
+
maxNGramLeng=6
|
| 25 |
+
usePrev=true
|
| 26 |
+
useNext=true
|
| 27 |
+
#useTags=true
|
| 28 |
+
#useWordTag=true
|
| 29 |
+
useLongSequences=true
|
| 30 |
+
useSequences=true
|
| 31 |
+
usePrevSequences=true
|
| 32 |
+
maxLeft=1
|
| 33 |
+
useTypeSeqs=true
|
| 34 |
+
useTypeSeqs2=true
|
| 35 |
+
useTypeySequences=true
|
| 36 |
+
useOccurrencePatterns=true
|
| 37 |
+
useLastRealWord=true
|
| 38 |
+
useNextRealWord=true
|
| 39 |
+
#useReverse=false
|
| 40 |
+
normalize=true
|
| 41 |
+
# normalizeTimex=true
|
| 42 |
+
wordShape=chris2useLC
|
| 43 |
+
useDisjunctive=true
|
| 44 |
+
disjunctionWidth=5
|
| 45 |
+
#useDisjunctiveShapeInteraction=true
|
| 46 |
+
|
| 47 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 48 |
+
|
| 49 |
+
useObservedSequencesOnly=true
|
| 50 |
+
|
| 51 |
+
useQN = true
|
| 52 |
+
QNsize = 25
|
| 53 |
+
|
| 54 |
+
# makes it go faster
|
| 55 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.crf.ser.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8025840c0c214e9787c96f4904e1108634d8321d01f85807a7e5a03ccb86c0ac
|
| 3 |
+
size 16787019
|
edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.prop
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
|
| 2 |
+
testFile = /u/nlp/data/ner/column_data/conll.testa
|
| 3 |
+
serializeTo = english.nowiki.3class.nodistsim.crf.ser.gz
|
| 4 |
+
|
| 5 |
+
type = crf
|
| 6 |
+
|
| 7 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 8 |
+
|
| 9 |
+
#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
|
| 10 |
+
distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200.pruned
|
| 11 |
+
useDistSim = false
|
| 12 |
+
|
| 13 |
+
map = word=0,answer=1
|
| 14 |
+
|
| 15 |
+
saveFeatureIndexToDisk = true
|
| 16 |
+
|
| 17 |
+
useClassFeature=true
|
| 18 |
+
useWord=true
|
| 19 |
+
#useWordPairs=true
|
| 20 |
+
useNGrams=true
|
| 21 |
+
noMidNGrams=true
|
| 22 |
+
maxNGramLeng=6
|
| 23 |
+
usePrev=true
|
| 24 |
+
useNext=true
|
| 25 |
+
#useTags=true
|
| 26 |
+
#useWordTag=true
|
| 27 |
+
useLongSequences=true
|
| 28 |
+
useSequences=true
|
| 29 |
+
usePrevSequences=true
|
| 30 |
+
maxLeft=1
|
| 31 |
+
useTypeSeqs=true
|
| 32 |
+
useTypeSeqs2=true
|
| 33 |
+
useTypeySequences=true
|
| 34 |
+
useOccurrencePatterns=true
|
| 35 |
+
useLastRealWord=true
|
| 36 |
+
useNextRealWord=true
|
| 37 |
+
#useReverse=false
|
| 38 |
+
normalize=true
|
| 39 |
+
# normalizeTimex=true
|
| 40 |
+
wordShape=chris2useLC
|
| 41 |
+
useDisjunctive=true
|
| 42 |
+
disjunctionWidth=5
|
| 43 |
+
#useDisjunctiveShapeInteraction=true
|
| 44 |
+
|
| 45 |
+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
| 46 |
+
|
| 47 |
+
useObservedSequencesOnly=true
|
| 48 |
+
|
| 49 |
+
useQN = true
|
| 50 |
+
QNsize = 25
|
| 51 |
+
|
| 52 |
+
# makes it go faster
|
| 53 |
+
featureDiffThresh=0.05
|
edu/stanford/nlp/models/ner/regexner.patterns
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c351da938e9d86e7cd36f2d29e8405b069e21d1c48ec0e579764e818b892a53
|
| 3 |
+
size 12409329
|
edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger.props
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## tagger training invoked at Tue Feb 25 04:12:25 PST 2014 with arguments:
|
| 2 |
+
model = english-left3words-distsim.tagger
|
| 3 |
+
arch = left3words,naacl2003unknowns,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1)
|
| 4 |
+
wordFunction = edu.stanford.nlp.process.AmericanizeFunction
|
| 5 |
+
trainFile = /u/nlp/data/pos-tagger/english/train-wsj-0-18;/u/nlp/data/pos-tagger/english/train-extra-english;/u/nlp/data/pos-tagger/english/train-tech-english
|
| 6 |
+
closedClassTags =
|
| 7 |
+
closedClassTagThreshold = 40
|
| 8 |
+
curWordMinFeatureThresh = 2
|
| 9 |
+
debug = false
|
| 10 |
+
debugPrefix =
|
| 11 |
+
tagSeparator = _
|
| 12 |
+
encoding = UTF-8
|
| 13 |
+
iterations = 100
|
| 14 |
+
lang = english
|
| 15 |
+
learnClosedClassTags = false
|
| 16 |
+
minFeatureThresh = 2
|
| 17 |
+
openClassTags =
|
| 18 |
+
rareWordMinFeatureThresh = 10
|
| 19 |
+
rareWordThresh = 5
|
| 20 |
+
search = owlqn
|
| 21 |
+
sgml = false
|
| 22 |
+
sigmaSquared = 0.0
|
| 23 |
+
regL1 = 0.75
|
| 24 |
+
tagInside =
|
| 25 |
+
tokenize = true
|
| 26 |
+
tokenizerFactory =
|
| 27 |
+
tokenizerOptions =
|
| 28 |
+
verbose = false
|
| 29 |
+
verboseResults = true
|
| 30 |
+
veryCommonWordThresh = 250
|
| 31 |
+
xmlInput =
|
| 32 |
+
outputFile =
|
| 33 |
+
outputFormat = slashTags
|
| 34 |
+
outputFormatOptions =
|
| 35 |
+
nthreads = 1
|
jar-in-jar-loader.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03d2bd671c54a211880d614022931ef2188a03f98272d8fc17f9b3217ac606b7
|
| 3 |
+
size 7269
|
libs/NeuralNetwork.jar
ADDED
|
Binary file (43.7 kB). View file
|
|
|
libs/SMOTE.jar
ADDED
|
Binary file (8.96 kB). View file
|
|
|
libs/commons-cli-1.4.jar
ADDED
|
Binary file (53.8 kB). View file
|
|
|
libs/excel/commons-codec-1.10.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4241dfa94e711d435f29a4604a3e2de5c4aa3c165e23bd066be6fc1fc4309569
|
| 3 |
+
size 284184
|
libs/excel/commons-collections4-4.1.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1fe8b5968b57d8465425357ed2d9dc695504518bed2df5b565c4b8e68c1c8a5
|
| 3 |
+
size 751238
|
libs/excel/commons-logging-1.2.jar
ADDED
|
Binary file (61.8 kB). View file
|
|
|
libs/excel/curvesapi-1.04.jar
ADDED
|
Binary file (98.4 kB). View file
|
|
|
libs/excel/junit-4.12.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59721f0805e223d84b90677887d9ff567dc534d7c502ca903c0c2b17f05c116a
|
| 3 |
+
size 314932
|
libs/excel/log4j-1.2.17.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d31696445697720527091754369082a6651bd49781b6005deb94e56753406f9
|
| 3 |
+
size 489884
|
libs/excel/poi-3.17.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30181821dd2e849727b638b9e329aeff4a64f3445c4142b13cf7a18bb3552edd
|
| 3 |
+
size 2701171
|
libs/excel/poi-examples-3.17.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc36c537277d06ea79bd4a231b00d5a48c9b44a3e1a14b6ddb68a6e1a4a335c6
|
| 3 |
+
size 374509
|
libs/excel/poi-excelant-3.17.jar
ADDED
|
Binary file (31.2 kB). View file
|
|
|
libs/excel/poi-ooxml-3.17.jar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac915547ea3b775a810cc26149711682a23404e3d6d0b239915ac0c9305ee3c3
|
| 3 |
+
size 1479023
|