niobures commited on Jun 26, 2025

Commit

bb654c7

verified ·

1 Parent(s): d36fe1c

SentiSE

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.classpath +31 -0
.gitattributes +25 -0
.gitignore +25 -0
.project +17 -0
.settings/org.eclipse.jdt.core.prefs +12 -0
LICENSE +165 -0
README.md +27 -0
bin/.gitignore +3 -0
build.xml +77 -0
edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz +3 -0
edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.prop +61 -0
edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.all.3class.distsim.prop +58 -0
edu/stanford/nlp/models/ner/english.all.3class.nodistsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.all.3class.nodistsim.prop +53 -0
edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.prop +61 -0
edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.conll.4class.distsim.prop +63 -0
edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.prop +62 -0
edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.prop +55 -0
edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.muc.7class.distsim.prop +54 -0
edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.prop +52 -0
edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.prop +55 -0
edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.crf.ser.gz +3 -0
edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.prop +53 -0
edu/stanford/nlp/models/ner/regexner.patterns +0 -0
edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger +3 -0
edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger.props +35 -0
jar-in-jar-loader.zip +3 -0
libs/NeuralNetwork.jar +0 -0
libs/SMOTE.jar +0 -0
libs/commons-cli-1.4.jar +0 -0
libs/excel/commons-codec-1.10.jar +3 -0
libs/excel/commons-collections4-4.1.jar +3 -0
libs/excel/commons-logging-1.2.jar +0 -0
libs/excel/curvesapi-1.04.jar +0 -0
libs/excel/junit-4.12.jar +3 -0
libs/excel/log4j-1.2.17.jar +3 -0
libs/excel/poi-3.17.jar +3 -0
libs/excel/poi-examples-3.17.jar +3 -0
libs/excel/poi-excelant-3.17.jar +0 -0
libs/excel/poi-ooxml-3.17.jar +3 -0

.classpath ADDED Viewed

	@@ -0,0 +1,31 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="lib" path="libs/excel/commons-codec-1.10.jar"/>
+	<classpathentry kind="lib" path="libs/excel/commons-collections4-4.1.jar"/>
+	<classpathentry kind="lib" path="libs/excel/commons-logging-1.2.jar"/>
+	<classpathentry kind="lib" path="libs/excel/curvesapi-1.04.jar"/>
+	<classpathentry kind="lib" path="libs/excel/junit-4.12.jar"/>
+	<classpathentry kind="lib" path="libs/excel/log4j-1.2.17.jar"/>
+	<classpathentry kind="lib" path="libs/excel/poi-3.17.jar"/>
+	<classpathentry kind="lib" path="libs/excel/poi-examples-3.17.jar"/>
+	<classpathentry kind="lib" path="libs/excel/poi-excelant-3.17.jar"/>
+	<classpathentry kind="lib" path="libs/excel/poi-ooxml-3.17.jar"/>
+	<classpathentry kind="lib" path="libs/excel/poi-ooxml-schemas-3.17.jar"/>
+	<classpathentry kind="lib" path="libs/excel/poi-scratchpad-3.17.jar"/>
+	<classpathentry kind="lib" path="libs/excel/xmlbeans-2.6.0.jar"/>
+	<classpathentry kind="lib" path="libs/mysql-connector-java-5.1.41.jar"/>
+	<classpathentry kind="lib" path="libs/SMOTE.jar"/>
+	<classpathentry kind="lib" path="libs/snowball-stemmer-1.3.0.581.1.jar"/>
+	<classpathentry kind="lib" path="libs/stanford-corenlp-3.8.0.jar"/>
+	<classpathentry kind="lib" path="libs/stanford-parser.jar"/>
+	<classpathentry kind="lib" path="libs/stanford-postagger.jar"/>
+	<classpathentry kind="lib" path="libs/weka-src.jar"/>
+	<classpathentry kind="lib" path="libs/weka.jar"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+	<classpathentry kind="lib" path="libs/commons-cli-1.4.jar"/>
+	<classpathentry kind="lib" path="libs/mtj-1.0-snapshot.jar"/>
+	<classpathentry kind="lib" path="libs/NeuralNetwork.jar"/>
+	<classpathentry kind="lib" path="libs/opencsv-2.3.jar"/>
+	<classpathentry kind="output" path="bin"/>
+</classpath>

.gitattributes CHANGED Viewed

@@ -33,3 +33,28 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text
+libs/excel/commons-codec-1.10.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/commons-collections4-4.1.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/junit-4.12.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/log4j-1.2.17.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/poi-3.17.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/poi-examples-3.17.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/poi-ooxml-3.17.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/poi-ooxml-schemas-3.17.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/poi-scratchpad-3.17.jar filter=lfs diff=lfs merge=lfs -text
+libs/excel/xmlbeans-2.6.0.jar filter=lfs diff=lfs merge=lfs -text
+libs/mtj-1.0-snapshot.jar filter=lfs diff=lfs merge=lfs -text
+libs/mysql-connector-java-5.1.41.jar filter=lfs diff=lfs merge=lfs -text
+libs/stanford-corenlp-3.8.0.jar filter=lfs diff=lfs merge=lfs -text
+libs/stanford-parser.jar filter=lfs diff=lfs merge=lfs -text
+libs/stanford-postagger.jar filter=lfs diff=lfs merge=lfs -text
+libs/weka-src.jar filter=lfs diff=lfs merge=lfs -text
+libs/weka.jar filter=lfs diff=lfs merge=lfs -text
+models/sentise-oracle-short.xlsx filter=lfs diff=lfs merge=lfs -text
+models/sentise-oracle1.xlsx filter=lfs diff=lfs merge=lfs -text
+models/sentise-oracle2.xlsx filter=lfs diff=lfs merge=lfs -text
+models/SentiWordNet_3.0.0_20130122.txt filter=lfs diff=lfs merge=lfs -text
+sentise.jar filter=lfs diff=lfs merge=lfs -text
+src/taggers/bidirectional-distsim-wsj-0-18.tagger filter=lfs diff=lfs merge=lfs -text
+src/taggers/english-left3words-distsim.tagger filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,25 @@

+# Compiled class file
+*.class
+# Log file
+*.log
+# BlueJ files
+*.ctxt
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+# Package Files #
+*.war
+*.nar
+*.ear
+*.csss
+*.css
+*.html
+*.tar.gz
+*.rar
+*.scss
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*

.project ADDED Viewed

	@@ -0,0 +1,17 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>SentiSE</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>

.settings/org.eclipse.jdt.core.prefs ADDED Viewed

	@@ -0,0 +1,12 @@

+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.8

LICENSE ADDED Viewed

	@@ -0,0 +1,165 @@

+					GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+  0. Additional Definitions.
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+  1. Exception to Section 3 of the GNU GPL.
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+  2. Conveying Modified Versions.
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+  3. Object Code Incorporating Material from Library Header Files.
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+  4. Combined Works.
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+   d) Do one of the following:
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+  5. Combined Libraries.
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+  6. Revised Versions of the GNU Lesser General Public License.
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.

README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+# SentiSE
+SentiSE is a sentiment analysis tool for Software Engineering interactions
+SentiSE, a supervised learning based sentiment analysis tool that incorporates ten supervised learning algorithms and
+fourteen different optional pre-processing steps that are commonly used to improve the performance of sentiment analysis tools.
+We empirically evaluated each of the algorithms and preprocessing steps to determine the best configuration.
+We evaluated SentiSE using a large-scale labeled dataset of 13K comments from three different types of SE interactions.
+<br><br>
+**Performance Evaluation:**
+We compare SentiSE with other sentiment analysis tool available in software engineering domain.  We use two dataset for this evaluation.  _Orcale1_ with 13 k labeled dataset with 21% positive, 60% neutral and 19% negative data and _Oracle2_ with 30% positive, 40% neutral and 30% negative data. Table bellow shows the performance comparison:
+| Oracle | tool  |Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) |  Precision<br> (Positive) | Recall<br> (Positive) | F-measure<br> (Positive) | Accuracy | Weighted <br> Kappa |
+|--|--| -- | -- |  -- | -- | --|  -- | -- | --|  -- | -- | --|
+|_Orcal1_|SentiSE  | 85.63% | 75.27% | 80.11% | 81.51% | 92.78% | 86.78% | 81.03% | 55.92%|66.16%|82.23%|0.681
+|_Oracle1_| SentiCR |  81.81% | 76.59%|  79.04%|  80.04% | 92.77% | 85.92%  | 82.71% | 46.38% | 59.40% | 80.6655% | 0.647
+|_Oracle1_| SentiStrength-SE |  75.81% | 81.45% | 78.53% | 84.68% | 83.64% | 84.16% | 66.50% | 63.42% | 64.92% | 79.32% | 0.6587
+|_Oracle2_| SentiSE | 88.83% | 85.09% | 86.92% | 86.62% | 91.52% | 89.00% | 85.87% | 78.61% | 82.07% | 86.92% | 0.788
+|_Oracle2_|SentiCR |  84.32% | 84.73% | 84.50% | 80.70% | 92.08% | 86.00%  | 86.45% | 59.49\% | 70.40%  | 82.47% | 0.716
+|_Oracle2_| SentiStrength-SE | 79.56% | 83.57% | 81.52% | 80.73% | 84.15% | 82.41% | 80.41% | 69.31% | 74.45% | 80.34% | 0. 696
+<br><br><br>
+**Usage Instructions:**
+Downaload and import SentiSe Project. Run the build.xml file and generate the sentise.jar. SentiSE is a commandline base tool. Use the command <code> java -jar sentise.jar -help</code> to find all the commands avialable in sentiSE.
+<br><br>
+**ScreenShot**
+![SentiSE-cli](https://github.com/amiangshu/SentiSE/blob/tanzeerH-readme/models/cli.png)

bin/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+/edu/
+/taggers/
+/weka/

build.xml ADDED Viewed

	@@ -0,0 +1,77 @@

+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<project default="jar" name="SentiSE Builder" >
+	<property name="src.dir"     value="src"/>
+	    <property name="build.dir"   value="build"/>
+	    <property name="classes.dir" value="${build.dir}/bin"/>
+	    <property name="jar.dir"     value="${class.dir}"/>
+	<property name="dir.jarfile" value="./"/>
+	    <property name="main-class"  value="edu.siu.sentise.SentiSE"/>
+	<path id="libs-class">
+	  <fileset dir="./libs">
+	    <include name="*.jar"/>
+	  </fileset>
+		</path>
+	<path id="excel-class">
+		  <fileset dir="./libs/excel">
+		    <include name="*.jar"/>
+		  </fileset>
+			</path>
+	<target name="clean">
+	        <delete dir="${build.dir}"/>
+	    </target>
+	    <target name="compile">
+	        <mkdir dir="${classes.dir}"/>
+	    	 <mkdir dir="${build.dir}"/>
+	        <javac  destdir="${classes.dir}" includeantruntime="false" >
+	        	<classpath refid="libs-class"/>
+	        	<classpath refid="excel-class"/>
+	        	<src path="${src.dir}"/>
+	    	</javac>
+	    </target>
+    <target name="jar" depends="compile">
+        <jar destfile="${jar.dir}/sentise.jar">
+            <manifest>
+                <attribute name="Main-Class" value="org.eclipse.jdt.internal.jarinjarloader.JarRsrcLoader"/>
+                <attribute name="Rsrc-Main-Class" value="edu.sentise.SentiSE"/>
+                <attribute name="Class-Path" value="."/>
+                <attribute name="Rsrc-Class-Path" value="./ mysql-connector-java-5.1.41.jar SMOTE.jar snowball-stemmer-1.3.0.581.1.jar stanford-corenlp-3.8.0.jar stanford-parser.jar stanford-postagger.jar weka-src.jar weka.jar commons-codec-1.10.jar commons-collections4-4.1.jar junit-4.12.jar log4j-1.2.17.jar poi-3.17.jar poi-examples-3.17.jar poi-excelant-3.17.jar poi-ooxml-3.17.jar poi-ooxml-schemas-3.17.jar poi-scratchpad-3.17.jar xmlbeans-2.6.0.jar commons-cli-1.4.jar mtj-1.0-snapshot.jar opencsv-2.3.jar NeuralNetwork.jar"/>
+            </manifest>
+            <zipfileset src="jar-in-jar-loader.zip"/>
+            <fileset dir="${dir.jarfile}/bin"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="mysql-connector-java-5.1.41.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="SMOTE.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="snowball-stemmer-1.3.0.581.1.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="stanford-corenlp-3.8.0.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="stanford-parser.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="stanford-postagger.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="weka-src.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="weka.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="commons-codec-1.10.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="commons-collections4-4.1.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="junit-4.12.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="log4j-1.2.17.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-3.17.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-examples-3.17.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-excelant-3.17.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-ooxml-3.17.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-ooxml-schemas-3.17.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="poi-scratchpad-3.17.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs/excel" includes="xmlbeans-2.6.0.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="commons-cli-1.4.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="mtj-1.0-snapshot.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="opencsv-2.3.jar"/>
+            <zipfileset dir="${dir.jarfile}/libs" includes="NeuralNetwork.jar"/>
+        </jar>
+    </target>
+</project>

edu/stanford/nlp/models/lexparser/englishPCFG.caseless.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:defb4d91b899c4f92ea605f026c57408c4d3df1fb371b50f2d055ace782cbc1e
+size 1769534

edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed674cce7311f76c018806e61cdb045e219258ee30014da4e3e686b9230ebea9
+size 1883795

edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c9a504ad8f82f415ad64ce96478c59788355d8096edc96ccb8b43289739f2f1
+size 37319863

edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.prop ADDED Viewed

	@@ -0,0 +1,61 @@

+# trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
+trainFileList = /u/nlp/data/ner/column_data/ace23.3class.train,/u/nlp/data/ner/column_data/muc6.3class.ptb.train,/u/nlp/data/ner/column_data/muc7.3class.ptb.train,/u/nlp/data/ner/column_data/conll.3class.train,/u/nlp/data/ner/column_data/wikiner.3class.train,/u/nlp/data/ner/column_data/ontonotes.3class.train,/u/nlp/data/ner/column_data/english.extra.3class.train
+testFiles = /u/nlp/data/ner/column_data/all.3class.test,/u/nlp/data/ner/column_data/all.3class.upper.test,/u/nlp/data/ner/column_data/all.3class.lower.test
+# testFile = uppercase.tsv
+serializeTo = english.all.3class.caseless.distsim.crf.ser.gz
+type = crf
+wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
+useKnownLCWords = false
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+# right options for egw4-reut.512 (though effect of having or not is small)
+numberEquivalenceDistSim = true
+unknownWordDistSimClass = 0
+useDistSim = true
+map = word=0,answer=1
+# saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+maxLeft=1
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65905fe1bbba47d0ac2b468e63d91cfde72dce0a7261345e015c45fab9cff93f
+size 31985042

edu/stanford/nlp/models/ner/english.all.3class.distsim.prop ADDED Viewed

	@@ -0,0 +1,58 @@

+# trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
+trainFileList = /u/nlp/data/ner/column_data/ace23.3class.train,/u/nlp/data/ner/column_data/muc6.3class.ptb.train,/u/nlp/data/ner/column_data/muc7.3class.ptb.train,/u/nlp/data/ner/column_data/conll.3class.train,/u/nlp/data/ner/column_data/wikiner.3class.train,/u/nlp/data/ner/column_data/ontonotes.3class.train,/u/nlp/data/ner/column_data/english.extra.3class.train
+testFile = /u/nlp/data/ner/column_data/all.3class.test
+serializeTo = english.all.3class.distsim.crf.ser.gz
+type = crf
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+# right options for egw4-reut.512 (though effect of having or not is small)
+numberEquivalenceDistSim = true
+unknownWordDistSimClass = 0
+useDistSim = true
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+maxLeft=1
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.all.3class.nodistsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77339de3c30111b12302d331d97b24304fdf537258e3b6007a1e6e96390ce9b2
+size 23179051

edu/stanford/nlp/models/ner/english.all.3class.nodistsim.prop ADDED Viewed

	@@ -0,0 +1,53 @@

+trainFileList = /u/nlp/data/ner/column_data/all.3class.train.old2,/u/nlp/data/ner/column_data/english.extra.3class.train
+testFile = /u/nlp/data/ner/column_data/all.3class.test
+serializeTo = english.all.3class.nodistsim.crf.ser.gz
+type = crf
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200.pruned
+useDistSim = false
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2073dcfff4061549fe6aa6b6fd22246ae6bad28c819c6806d690ad3c1e383c8d
+size 21421444

edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.prop ADDED Viewed

	@@ -0,0 +1,61 @@

+# This is better than Jenny's either with or without distsim turned on
+# And using iob2 is better for optimal CoNLL performance.
+# Features titled "chris2009"
+trainFile = /u/nlp/data/ner/column_data/conll.4class.train
+# testFile = /u/nlp/data/ner/column_data/conll.4class.testa
+serializeTo = english.conll.4class.caseless.distsim.crf.ser.gz
+wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
+useKnownLCWords = false
+useDistSim = true
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useTitle = true
+useClassFeature=true
+useWord=true
+# useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+# maxNGramLeng=6 # Having them all helps, which is the default
+usePrev=true
+useNext=true
+# useTags=true
+# useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+# dan2 better than chris2 on CoNLL data...
+wordShape=dan2useLC
+useDisjunctive=true
+# disjunctionWidth 4 is better than 5 on CoNLL data
+disjunctionWidth=4
+#useDisjunctiveShapeInteraction=true
+type=crf
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+sigma = 20
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f60c3630612f2c93420f191fec792f87a4f64aee38998324b7a1f8b5c90f7363
+size 17803778

edu/stanford/nlp/models/ner/english.conll.4class.distsim.prop ADDED Viewed

	@@ -0,0 +1,63 @@

+# This is better than Jenny's either with or without distsim turned on
+# And using iob2 is better for optimal CoNLL performance.
+# Features titled "chris2009"
+trainFile = /u/nlp/data/ner/column_data/conll.4class.train
+# testFile = /u/nlp/data/ner/column_data/conll.4class.testa
+serializeTo = english.conll.4class.distsim.crf.ser.gz
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+useDistSim = true
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+# right options for egw4-reut.512 (though effect of having or not is small)
+numberEquivalenceDistSim = true
+unknownWordDistSimClass = 0
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useTitle = true
+useClassFeature=true
+useWord=true
+# useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+# maxNGramLeng=6 # Having them all helps, which is the default
+usePrev=true
+useNext=true
+# useTags=true
+# useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+# dan2 better than chris2 on CoNLL data...
+wordShape=dan2useLC
+useDisjunctive=true
+# disjunctionWidth 4 is better than 5 on CoNLL data
+disjunctionWidth=4
+#useDisjunctiveShapeInteraction=true
+type=crf
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+sigma = 20
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b14f1b2b25935f0c7b1499e3a910ec11d3bbb1e7350ed0f888b8f170fe38a90c
+size 14860645

edu/stanford/nlp/models/ner/english.conll.4class.nodistsim.prop ADDED Viewed

	@@ -0,0 +1,62 @@

+# This is better than Jenny's either with or without distsim turned on
+# And using iob2 is better for optimal CoNLL performance.
+# Features labeled "chris2009"
+trainFile = /u/nlp/data/ner/column_data/conll.4class.train
+# testFile = /u/nlp/data/ner/column_data/conll.4class.testa
+serializeTo = english.conll.4class.nodistsim.crf.ser.gz
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+# This is good, but deliberately not used here
+# useDistSim = true
+useDistSim = false
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useTitle = true
+useClassFeature=true
+useWord=true
+# useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+# maxNGramLeng=6 # Having them all helps, which is the default
+usePrev=true
+useNext=true
+# useTags=true
+# useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+# dan2 better than chris2 on CoNLL data...
+wordShape=dan2useLC
+useDisjunctive=true
+# disjunctionWidth 4 is better than 5 on CoNLL data
+disjunctionWidth=4
+#useDisjunctiveShapeInteraction=true
+type=crf
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+sigma = 20
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:821281c380d277749c2641d6694584beef533c9cf222d24f03a79d45c18e1291
+size 20118311

edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.prop ADDED Viewed

	@@ -0,0 +1,55 @@

+trainFileList = /u/nlp/data/ner/column_data/muc6.ptb.train,/u/nlp/data/ner/column_data/muc7.ptb.train
+# testFile = /u/nlp/data/ner/column_data/muc7.ptb.devtest
+serializeTo = english.muc.7class.caseless.distsim.crf.ser.gz
+type=crf
+wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
+useKnownLCWords = false
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+numberEquivalenceDistSim = true
+unknownWordDistSimClass = 0
+useDistSim = true
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+maxLeft=1
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2d7308d176718cdd0e7a062aaaf5e94d2c4bb8c9b787f2177196dcce1db5d0c
+size 17859765

edu/stanford/nlp/models/ner/english.muc.7class.distsim.prop ADDED Viewed

	@@ -0,0 +1,54 @@

+trainFileList = /u/nlp/data/ner/column_data/muc6.ptb.train,/u/nlp/data/ner/column_data/muc7.ptb.train
+# testFile = /u/nlp/data/ner/column_data/muc7.ptb.devtest
+serializeTo = english.muc.7class.distsim.crf.ser.gz
+type=crf
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+numberEquivalenceDistSim = true
+unknownWordDistSimClass = 0
+useDistSim = true
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+maxLeft=1
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40f1e3940b1424ffc699c2a3d74274e53b317d31532f4f955ca811d2a6e5b660
+size 14494199

edu/stanford/nlp/models/ner/english.muc.7class.nodistsim.prop ADDED Viewed

	@@ -0,0 +1,52 @@

+trainFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.train
+testFile = /u/nlp/data/ner/goodClassifiers/data/muc67.jenny.test
+serializeTo = english.muc.7class.nodistsim.crf.ser.gz
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
+useDistSim = false
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+type=crf
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:645677c2646791ac2a07b29be12af60cff84ea02936ac375af8b03939eb30adf
+size 20284974

edu/stanford/nlp/models/ner/english.nowiki.3class.caseless.distsim.prop ADDED Viewed

	@@ -0,0 +1,55 @@

+trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
+testFile = /u/nlp/data/ner/column_data/conll.testa
+serializeTo = english.nowiki.3class.caseless.distsim.crf.ser.gz
+type = crf
+wordFunction = edu.stanford.nlp.process.LowercaseAndAmericanizeFunction
+useKnownLCWords = false
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+useDistSim = true
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.crf.ser.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8025840c0c214e9787c96f4904e1108634d8321d01f85807a7e5a03ccb86c0ac
+size 16787019

edu/stanford/nlp/models/ner/english.nowiki.3class.nodistsim.prop ADDED Viewed

	@@ -0,0 +1,53 @@

+trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
+testFile = /u/nlp/data/ner/column_data/conll.testa
+serializeTo = english.nowiki.3class.nodistsim.crf.ser.gz
+type = crf
+wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200.pruned
+useDistSim = false
+map = word=0,answer=1
+saveFeatureIndexToDisk = true
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+useObservedSequencesOnly=true
+useQN = true
+QNsize = 25
+# makes it go faster
+featureDiffThresh=0.05

edu/stanford/nlp/models/ner/regexner.patterns ADDED Viewed

The diff for this file is too large to render. See raw diff

edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c351da938e9d86e7cd36f2d29e8405b069e21d1c48ec0e579764e818b892a53
+size 12409329

edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger.props ADDED Viewed

	@@ -0,0 +1,35 @@

+## tagger training invoked at Tue Feb 25 04:12:25 PST 2014 with arguments:
+                   model = english-left3words-distsim.tagger
+                    arch = left3words,naacl2003unknowns,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1)
+            wordFunction = edu.stanford.nlp.process.AmericanizeFunction
+               trainFile = /u/nlp/data/pos-tagger/english/train-wsj-0-18;/u/nlp/data/pos-tagger/english/train-extra-english;/u/nlp/data/pos-tagger/english/train-tech-english
+         closedClassTags =
+ closedClassTagThreshold = 40
+ curWordMinFeatureThresh = 2
+                   debug = false
+             debugPrefix =
+            tagSeparator = _
+                encoding = UTF-8
+              iterations = 100
+                    lang = english
+    learnClosedClassTags = false
+        minFeatureThresh = 2
+           openClassTags =
+rareWordMinFeatureThresh = 10
+          rareWordThresh = 5
+                  search = owlqn
+                    sgml = false
+            sigmaSquared = 0.0
+                   regL1 = 0.75
+               tagInside =
+                tokenize = true
+        tokenizerFactory =
+        tokenizerOptions =
+                 verbose = false
+          verboseResults = true
+    veryCommonWordThresh = 250
+                xmlInput =
+              outputFile =
+            outputFormat = slashTags
+     outputFormatOptions =
+                nthreads = 1

jar-in-jar-loader.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03d2bd671c54a211880d614022931ef2188a03f98272d8fc17f9b3217ac606b7
+size 7269

libs/NeuralNetwork.jar ADDED Viewed

Binary file (43.7 kB). View file

libs/SMOTE.jar ADDED Viewed

Binary file (8.96 kB). View file

libs/commons-cli-1.4.jar ADDED Viewed

Binary file (53.8 kB). View file

libs/excel/commons-codec-1.10.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4241dfa94e711d435f29a4604a3e2de5c4aa3c165e23bd066be6fc1fc4309569
+size 284184

libs/excel/commons-collections4-4.1.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1fe8b5968b57d8465425357ed2d9dc695504518bed2df5b565c4b8e68c1c8a5
+size 751238

libs/excel/commons-logging-1.2.jar ADDED Viewed

Binary file (61.8 kB). View file

libs/excel/curvesapi-1.04.jar ADDED Viewed

Binary file (98.4 kB). View file

libs/excel/junit-4.12.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59721f0805e223d84b90677887d9ff567dc534d7c502ca903c0c2b17f05c116a
+size 314932

libs/excel/log4j-1.2.17.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d31696445697720527091754369082a6651bd49781b6005deb94e56753406f9
+size 489884

libs/excel/poi-3.17.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30181821dd2e849727b638b9e329aeff4a64f3445c4142b13cf7a18bb3552edd
+size 2701171

libs/excel/poi-examples-3.17.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc36c537277d06ea79bd4a231b00d5a48c9b44a3e1a14b6ddb68a6e1a4a335c6
+size 374509

libs/excel/poi-excelant-3.17.jar ADDED Viewed

Binary file (31.2 kB). View file

libs/excel/poi-ooxml-3.17.jar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac915547ea3b775a810cc26149711682a23404e3d6d0b239915ac0c9305ee3c3
+size 1479023