Goran Glavaš
commited on
Commit
·
1d5d16d
1
Parent(s):
41fc5cc
Fixing relative resources path access (Maven)
Browse files
README.txt
CHANGED
|
@@ -16,6 +16,7 @@ This repository contains:
|
|
| 16 |
Usage
|
| 17 |
========
|
| 18 |
|
|
|
|
| 19 |
The following command with four arguments runs the GraphSeg tool:
|
| 20 |
|
| 21 |
java -jar graphseg.jar <input-folder-path> <output-folder-path> <relatedness-treshold> <minimal-segment-size>
|
|
|
|
| 16 |
Usage
|
| 17 |
========
|
| 18 |
|
| 19 |
+
To successfully run the GraphSeg tool you need to have Java 1.8 installed.
|
| 20 |
The following command with four arguments runs the GraphSeg tool:
|
| 21 |
|
| 22 |
java -jar graphseg.jar <input-folder-path> <output-folder-path> <relatedness-treshold> <minimal-segment-size>
|
binary/graphseg.jar
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8933d43d10f8ba885d4df38b3b0a2d1bf7796f7d367b7f9fef6f3925801987c5
|
| 3 |
+
size 616819715
|
source/src/edu/uma/nlp/graphseg/Start.java
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
package edu.uma.nlp.graphseg;
|
| 2 |
-
|
| 3 |
import java.io.File;
|
| 4 |
import java.io.IOException;
|
|
|
|
| 5 |
import java.nio.file.Files;
|
| 6 |
import java.nio.file.Path;
|
| 7 |
import java.nio.file.Paths;
|
|
@@ -73,15 +73,15 @@ public class Start {
|
|
| 73 |
return;
|
| 74 |
}
|
| 75 |
|
| 76 |
-
|
| 77 |
-
List<String> stopwords = IOHelper.
|
| 78 |
|
| 79 |
-
|
| 80 |
MemoryStorage.setWordVectorSpace(new WordVectorSpace());
|
| 81 |
-
MemoryStorage.getWordVectorSpace().load(
|
| 82 |
|
| 83 |
-
|
| 84 |
-
MemoryStorage.setInformationContent(new InformationContent(
|
| 85 |
|
| 86 |
|
| 87 |
SemanticSimilarity.setStopwords(stopwords);
|
|
|
|
| 1 |
package edu.uma.nlp.graphseg;
|
|
|
|
| 2 |
import java.io.File;
|
| 3 |
import java.io.IOException;
|
| 4 |
+
import java.io.InputStream;
|
| 5 |
import java.nio.file.Files;
|
| 6 |
import java.nio.file.Path;
|
| 7 |
import java.nio.file.Paths;
|
|
|
|
| 73 |
return;
|
| 74 |
}
|
| 75 |
|
| 76 |
+
InputStream stopwordsStream = Start.class.getClassLoader().getResourceAsStream("stopwords.txt");
|
| 77 |
+
List<String> stopwords = IOHelper.getAllLinesStream(stopwordsStream);
|
| 78 |
|
| 79 |
+
InputStream embeddingsStream = Start.class.getClassLoader().getResourceAsStream("embeddings.txt");
|
| 80 |
MemoryStorage.setWordVectorSpace(new WordVectorSpace());
|
| 81 |
+
MemoryStorage.getWordVectorSpace().load(embeddingsStream, null);
|
| 82 |
|
| 83 |
+
InputStream freqsStream = Start.class.getClassLoader().getResourceAsStream("freqs.txt");
|
| 84 |
+
MemoryStorage.setInformationContent(new InformationContent(freqsStream, 1));
|
| 85 |
|
| 86 |
|
| 87 |
SemanticSimilarity.setStopwords(stopwords);
|
source/src/edu/uma/nlp/graphseg/semantics/WordVectorSpace.java
CHANGED
|
@@ -3,10 +3,10 @@ package edu.uma.nlp.graphseg.semantics;
|
|
| 3 |
import java.io.BufferedReader;
|
| 4 |
import java.io.BufferedWriter;
|
| 5 |
import java.io.File;
|
| 6 |
-
import java.io.FileInputStream;
|
| 7 |
import java.io.FileNotFoundException;
|
| 8 |
import java.io.FileOutputStream;
|
| 9 |
import java.io.IOException;
|
|
|
|
| 10 |
import java.io.InputStreamReader;
|
| 11 |
import java.io.OutputStreamWriter;
|
| 12 |
import java.util.ArrayList;
|
|
@@ -26,11 +26,11 @@ public class WordVectorSpace {
|
|
| 26 |
return dimension;
|
| 27 |
}
|
| 28 |
|
| 29 |
-
public void load(
|
| 30 |
{
|
| 31 |
embeddings = new HashMap<String, double[]>();
|
| 32 |
|
| 33 |
-
try (BufferedReader br = new BufferedReader(new InputStreamReader(
|
| 34 |
String line;
|
| 35 |
int counter = 0;
|
| 36 |
while ((line = br.readLine()) != null) {
|
|
|
|
| 3 |
import java.io.BufferedReader;
|
| 4 |
import java.io.BufferedWriter;
|
| 5 |
import java.io.File;
|
|
|
|
| 6 |
import java.io.FileNotFoundException;
|
| 7 |
import java.io.FileOutputStream;
|
| 8 |
import java.io.IOException;
|
| 9 |
+
import java.io.InputStream;
|
| 10 |
import java.io.InputStreamReader;
|
| 11 |
import java.io.OutputStreamWriter;
|
| 12 |
import java.util.ArrayList;
|
|
|
|
| 26 |
return dimension;
|
| 27 |
}
|
| 28 |
|
| 29 |
+
public void load(InputStream stream, HashMap<String, Integer> filters) throws FileNotFoundException, IOException
|
| 30 |
{
|
| 31 |
embeddings = new HashMap<String, double[]>();
|
| 32 |
|
| 33 |
+
try (BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF8"))) {
|
| 34 |
String line;
|
| 35 |
int counter = 0;
|
| 36 |
while ((line = br.readLine()) != null) {
|