File size: 7,132 Bytes
6f3ebfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
package org.maltparser.concurrent;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

/**
 * This class contains some basic methods to read sentence from file, write sentence to file, 
 * strip gold-standard information from the input, print sentence to stream and check difference between two sentences.
 * 
 * @author Johan Hall
 *
 */
public class ConcurrentUtils {
    /**
     * Reads a sentence from the a reader and returns a string array with tokens.
     * 
     * The method expect that each line contains a token and empty line is equal to end of sentence.
     * 
     * There are no check for particular data format so if the input is garbage then the output will also be garbage. 
     * 
     * @param reader a buffered reader
     * @return a string array with tokens
     * @throws IOException
     */
    public static String[] readSentence(BufferedReader reader) throws IOException {
    	ArrayList<String> tokens = new ArrayList<String>();
    	String line;
		while ((line = reader.readLine()) != null) {
			if (line.trim().length() == 0) {
				break;
			} else {
				tokens.add(line.trim());
			}

		}
    	return tokens.toArray(new String[tokens.size()]);
    }

    
    
    /**
     * Writes a sentence to a writer. It expect a string array with tokens.
     * 
     * Each token will be one line and after all tokens are written there will be one empty line marking the ending of sentence.
     * 
     * @param inTokens 
     * @param writer a buffered writer
     * @throws IOException
     */
    public static void writeSentence(String[] inTokens, BufferedWriter writer) throws IOException {
    	for (int i = 0; i < inTokens.length; i++) {
    		writer.write(inTokens[i]);
    		writer.newLine();
    	}
    	writer.newLine();
    	writer.flush();
    }
    
    /**
     * Strips the two last columns for each tokens. This method can be useful when reading a file with gold-standard 
     * information in the last two columns and you want to parse without gold-standard information.  
     * 
     * The method expect that each columns are separated with a tab-character.
     * 
     * @param inTokens a string array with tokens where each column are separated with a tab-character
     * @return a string array with tokens without the last two columns
     */
    public static String[] stripGold(String[] inTokens) {
    	return stripGold(inTokens, 2);
    }
    
    /**
     * Strips the <i>stripNumberOfEndingColumns</i> last columns for each tokens. This method can be useful when reading 
     * a file with gold-standard information in the last <i>stripNumberOfEndingColumns</i> columns and you want to 
     * parse without gold-standard information.
     *
     * 
     * @param inTokens a string array with tokens where each column are separated with a tab-character
     * @param stripNumberOfEndingColumns a string array with tokens without the last <i>stripNumberOfEndingColumns</i> columns
     * @return
     */
    public static String[] stripGold(String[] inTokens, int stripNumberOfEndingColumns) {
    	String[] outTokens = new String[inTokens.length];
    	
    	for (int i = 0; i < inTokens.length; i++) {
    		int tabCounter = 0;
    		int j = inTokens[i].length()-1;
    		for (; j >= 0; j--) {
    			if (inTokens[i].charAt(j) == '\t') {
    				tabCounter++;
    			}
    			if (tabCounter == stripNumberOfEndingColumns) {
    				outTokens[i] = inTokens[i].substring(0, j);
    				break;
    			}
    		}
    	}
    	return outTokens;
    }
   
    /**
     * Prints a sentence to the Standard-out stream. It expect a string array with tokens.
     * 
     * Each token will be one line and after all tokens are printed there will be one empty line marking the ending of sentence.
     * 
     * @param inTokens a string array with tokens
     */
    public static void printTokens(String[] inTokens) {
    	printTokens(inTokens, System.out);
    }
    
    /**
     * Prints a sentence to a stream. It expect a string array with tokens.
     * 
     * Each token will be one line and after all tokens are printed there will be one empty line marking the ending of sentence.
     * @param inTokens a string array with tokens
     * @param stream a print stream
     */
    public static void printTokens(String[] inTokens, PrintStream stream) {
    	for (int i = 0; i < inTokens.length; i++) {
    		stream.println(inTokens[i]);
    	}
    	stream.println();
    }
    
    /**
     * Check if there are difference between two sentences
     * 
     * @param goldTokens the sentence one with an array of tokens
     * @param outputTokens the sentence two with an array of tokens
     * @return true, if the sentences differ otherwise false
     */
    public static boolean diffSentences(String[] goldTokens, String[] outputTokens) {
    	if (goldTokens.length != outputTokens.length) {
    		return true;
    	}
    	for (int i = 0; i < goldTokens.length; i++) {
    		if (!goldTokens[i].equals(outputTokens[i])) {
    			return true;
    		}
    	}
    	return false;
    }
    
    public static void simpleEvaluation(List<String[]> goldSentences, List<String[]> parsedSentences, int headColumn, int dependencyLabelColumn, PrintStream stream) {
    	if (goldSentences.size() != parsedSentences.size()) {
    		stream.println("Number of sentences in gold and output differs");
    		return;
    	}
    	int nTokens = 0;
    	int nCorrectHead = 0;
    	int nCorrectLabel = 0;
    	int nCorrectBoth = 0;
    	
    	for (int i = 0; i < goldSentences.size(); i++) {
    		String[] goldTokens = goldSentences.get(i);
    		String[] parsedTokens = parsedSentences.get(i);
        	if (goldTokens.length != parsedTokens.length) {
        		stream.println("Number of tokens in gold and output differs in sentence " + i);
        		return;
        	}
        	for (int j = 0; j < goldTokens.length; j++) {
        		nTokens++;
        		String[] goldColumns = goldTokens[j].split("\t");
        		String[] parsedColumns = parsedTokens[j].split("\t");
//        		System.out.format("%s %s", goldColumns[headColumn],parsedColumns[headColumn]);
        		if (goldColumns[headColumn].equals(parsedColumns[headColumn])) {
        			nCorrectHead++;
        		}
        		if (goldColumns[dependencyLabelColumn].equals(parsedColumns[dependencyLabelColumn])) {
        			nCorrectLabel++;
        		}
        		if (goldColumns[headColumn].equals(parsedColumns[headColumn]) && goldColumns[dependencyLabelColumn].equals(parsedColumns[dependencyLabelColumn])) {
        			nCorrectBoth++;
        		}
        	}
    	}
    	stream.format("Labeled   attachment score: %d / %d * 100 = %.2f %%\n", nCorrectBoth, nTokens, (((float)nCorrectBoth/(float)nTokens)*100.0));
    	stream.format("Unlabeled attachment score: %d / %d * 100 = %.2f %%\n", nCorrectHead, nTokens, (((float)nCorrectHead/(float)nTokens)*100.0));
    	stream.format("Label accuracy score:       %d / %d * 100 = %.2f %%\n", nCorrectLabel, nTokens, (((float)nCorrectLabel/(float)nTokens)*100.0));
    }
}