|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdio.h> |
|
|
#include <ctype.h> |
|
|
#include <string.h> |
|
|
#include <stdlib.h> |
|
|
#ifdef HAVE_CONFIG_H |
|
|
#include "config.h" |
|
|
#endif |
|
|
#include "wn.h" |
|
|
|
|
|
#ifdef _WINDOWS |
|
|
#include <windows.h> |
|
|
#include <windowsx.h> |
|
|
#define EXCFILE "%s\\%s.exc" |
|
|
#else |
|
|
#define EXCFILE "%s/%s.exc" |
|
|
#endif |
|
|
|
|
|
static char *Id = "$Id: morph.c,v 1.67 2006/11/14 21:00:23 wn Exp $"; |
|
|
|
|
|
static char *sufx[] ={ |
|
|
|
|
|
"s", "ses", "xes", "zes", "ches", "shes", "men", "ies", |
|
|
|
|
|
"s", "ies", "es", "es", "ed", "ed", "ing", "ing", |
|
|
|
|
|
"er", "est", "er", "est" |
|
|
}; |
|
|
|
|
|
static char *addr[] ={ |
|
|
|
|
|
"", "s", "x", "z", "ch", "sh", "man", "y", |
|
|
|
|
|
"", "y", "e", "", "e", "", "e", "", |
|
|
|
|
|
"", "", "e", "e" |
|
|
}; |
|
|
|
|
|
static int offsets[NUMPARTS] = { 0, 0, 8, 16 }; |
|
|
static int cnts[NUMPARTS] = { 0, 8, 8, 4 }; |
|
|
static char msgbuf[256]; |
|
|
|
|
|
#define NUMPREPS 15 |
|
|
|
|
|
static struct { |
|
|
char *str; |
|
|
int strlen; |
|
|
} prepositions[NUMPREPS] = { |
|
|
"to", 2, |
|
|
"at", 2, |
|
|
"of", 2, |
|
|
"on", 2, |
|
|
"off", 3, |
|
|
"in", 2, |
|
|
"out", 3, |
|
|
"up", 2, |
|
|
"down", 4, |
|
|
"from", 4, |
|
|
"with", 4, |
|
|
"into", 4, |
|
|
"for", 3, |
|
|
"about", 5, |
|
|
"between", 7, |
|
|
}; |
|
|
|
|
|
static FILE *exc_fps[NUMPARTS + 1]; |
|
|
|
|
|
static int do_init(); |
|
|
static int strend(char *, char *); |
|
|
static char *wordbase(char *, int); |
|
|
static int hasprep(char *, int); |
|
|
static char *exc_lookup(char *, int); |
|
|
static char *morphprep(char *); |
|
|
|
|
|
|
|
|
|
|
|
int morphinit(void) |
|
|
{ |
|
|
static int done = 0; |
|
|
static int openerr = 0; |
|
|
|
|
|
if (!done) { |
|
|
if (OpenDB) { |
|
|
if (!(openerr = do_init())) |
|
|
done = 1; |
|
|
} else |
|
|
openerr = -1; |
|
|
} |
|
|
|
|
|
return(openerr); |
|
|
} |
|
|
|
|
|
|
|
|
int re_morphinit(void) |
|
|
{ |
|
|
int i; |
|
|
|
|
|
for (i = 1; i <= NUMPARTS; i++) { |
|
|
if (exc_fps[i] != NULL) { |
|
|
fclose(exc_fps[i]); exc_fps[i] = NULL; |
|
|
} |
|
|
} |
|
|
|
|
|
return(OpenDB ? do_init() : -1); |
|
|
} |
|
|
|
|
|
static int do_init(void) |
|
|
{ |
|
|
int i, openerr; |
|
|
#ifdef _WINDOWS |
|
|
HKEY hkey; |
|
|
DWORD dwType, dwSize; |
|
|
#else |
|
|
char *env; |
|
|
#endif |
|
|
char searchdir[256], fname[256]; |
|
|
|
|
|
openerr = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef _WINDOWS |
|
|
if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, TEXT("Software\\WordNet\\3.0"), |
|
|
0, KEY_READ, &hkey) == ERROR_SUCCESS) { |
|
|
dwSize = sizeof(searchdir); |
|
|
RegQueryValueEx(hkey, TEXT("WNHome"), |
|
|
NULL, &dwType, searchdir, &dwSize); |
|
|
RegCloseKey(hkey); |
|
|
strcat(searchdir, DICTDIR); |
|
|
} |
|
|
else if (RegOpenKeyEx(HKEY_CURRENT_USER, TEXT("Software\\WordNet\\3.0"), |
|
|
0, KEY_READ, &hkey) == ERROR_SUCCESS) { |
|
|
dwSize = sizeof(searchdir); |
|
|
RegQueryValueEx(hkey, TEXT("WNHome"), |
|
|
NULL, &dwType, searchdir, &dwSize); |
|
|
RegCloseKey(hkey); |
|
|
strcat(searchdir, DICTDIR); |
|
|
} else |
|
|
sprintf(searchdir, DEFAULTPATH); |
|
|
#else |
|
|
if ((env = getenv("WNSEARCHDIR")) != NULL) |
|
|
strcpy(searchdir, env); |
|
|
else if ((env = getenv("WNHOME")) != NULL) |
|
|
sprintf(searchdir, "%s%s", env, DICTDIR); |
|
|
else |
|
|
strcpy(searchdir, DEFAULTPATH); |
|
|
#endif |
|
|
|
|
|
for (i = 1; i <= NUMPARTS; i++) { |
|
|
sprintf(fname, EXCFILE, searchdir, partnames[i]); |
|
|
if ((exc_fps[i] = fopen(fname, "r")) == NULL) { |
|
|
sprintf(msgbuf, |
|
|
"WordNet library error: Can't open exception file(%s)\n\n", |
|
|
fname); |
|
|
display_message(msgbuf); |
|
|
openerr = -1; |
|
|
} |
|
|
} |
|
|
return(openerr); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char *morphstr(char *origstr, int pos) |
|
|
{ |
|
|
static char searchstr[WORDBUF], str[WORDBUF]; |
|
|
static int svcnt, svprep; |
|
|
char word[WORDBUF], *tmp; |
|
|
int cnt, st_idx = 0, end_idx; |
|
|
int prep; |
|
|
char *end_idx1, *end_idx2; |
|
|
char *append; |
|
|
|
|
|
if (pos == SATELLITE) |
|
|
pos = ADJ; |
|
|
|
|
|
|
|
|
|
|
|
if (origstr != NULL) { |
|
|
|
|
|
strtolower(strsubst(strcpy(str, origstr), ' ', '_')); |
|
|
searchstr[0] = '\0'; |
|
|
cnt = cntwords(str, '_'); |
|
|
svprep = 0; |
|
|
|
|
|
|
|
|
|
|
|
if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str)) { |
|
|
svcnt = 1; |
|
|
return(tmp); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (pos != VERB && (tmp = morphword(str, pos)) && strcmp(tmp, str)) |
|
|
return(tmp); |
|
|
|
|
|
if (pos == VERB && cnt > 1 && (prep = hasprep(str, cnt))) { |
|
|
|
|
|
svprep = prep; |
|
|
return(morphprep(str)); |
|
|
} else { |
|
|
svcnt = cnt = cntwords(str, '-'); |
|
|
while (origstr && --cnt) { |
|
|
end_idx1 = strchr(str + st_idx, '_'); |
|
|
end_idx2 = strchr(str + st_idx, '-'); |
|
|
if (end_idx1 && end_idx2) { |
|
|
if (end_idx1 < end_idx2) { |
|
|
end_idx = (int)(end_idx1 - str); |
|
|
append = "_"; |
|
|
} else { |
|
|
end_idx = (int)(end_idx2 - str); |
|
|
append = "-"; |
|
|
} |
|
|
} else { |
|
|
if (end_idx1) { |
|
|
end_idx = (int)(end_idx1 - str); |
|
|
append = "_"; |
|
|
} else { |
|
|
end_idx = (int)(end_idx2 - str); |
|
|
append = "-"; |
|
|
} |
|
|
} |
|
|
if (end_idx < 0) return(NULL); |
|
|
strncpy(word, str + st_idx, end_idx - st_idx); |
|
|
word[end_idx - st_idx] = '\0'; |
|
|
if(tmp = morphword(word, pos)) |
|
|
strcat(searchstr,tmp); |
|
|
else |
|
|
strcat(searchstr,word); |
|
|
strcat(searchstr, append); |
|
|
st_idx = end_idx + 1; |
|
|
} |
|
|
|
|
|
if(tmp = morphword(strcpy(word, str + st_idx), pos)) |
|
|
strcat(searchstr,tmp); |
|
|
else |
|
|
strcat(searchstr,word); |
|
|
if(strcmp(searchstr, str) && is_defined(searchstr,pos)) |
|
|
return(searchstr); |
|
|
else |
|
|
return(NULL); |
|
|
} |
|
|
} else { |
|
|
if (svprep) { |
|
|
svprep = 0; |
|
|
return(NULL); |
|
|
} else if (svcnt == 1) |
|
|
return(exc_lookup(NULL, pos)); |
|
|
else { |
|
|
svcnt = 1; |
|
|
if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str)) |
|
|
return(tmp); |
|
|
else |
|
|
return(NULL); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
char *morphword(char *word, int pos) |
|
|
{ |
|
|
int offset, cnt; |
|
|
int i; |
|
|
static char retval[WORDBUF]; |
|
|
char *tmp, tmpbuf[WORDBUF], *end; |
|
|
|
|
|
sprintf(retval,""); |
|
|
sprintf(tmpbuf, ""); |
|
|
end = ""; |
|
|
|
|
|
if(word == NULL) |
|
|
return(NULL); |
|
|
|
|
|
|
|
|
|
|
|
if((tmp = exc_lookup(word, pos)) != NULL) |
|
|
return(tmp); |
|
|
|
|
|
if (pos == ADV) { |
|
|
return(NULL); |
|
|
} |
|
|
if (pos == NOUN) { |
|
|
if (strend(word, "ful")) { |
|
|
cnt = strrchr(word, 'f') - word; |
|
|
strncat(tmpbuf, word, cnt); |
|
|
end = "ful"; |
|
|
} else |
|
|
|
|
|
if (strend(word, "ss") || (strlen(word) <= 2)) |
|
|
return(NULL); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (tmpbuf[0] == '\0') |
|
|
strcpy(tmpbuf, word); |
|
|
|
|
|
offset = offsets[pos]; |
|
|
cnt = cnts[pos]; |
|
|
|
|
|
for(i = 0; i < cnt; i++){ |
|
|
strcpy(retval, wordbase(tmpbuf, (i + offset))); |
|
|
if(strcmp(retval, tmpbuf) && is_defined(retval, pos)) { |
|
|
strcat(retval, end); |
|
|
return(retval); |
|
|
} |
|
|
} |
|
|
return(NULL); |
|
|
} |
|
|
|
|
|
static int strend(char *str1, char *str2) |
|
|
{ |
|
|
char *pt1; |
|
|
|
|
|
if(strlen(str2) >= strlen(str1)) |
|
|
return(0); |
|
|
else { |
|
|
pt1=str1; |
|
|
pt1=strchr(str1,0); |
|
|
pt1=pt1-strlen(str2); |
|
|
return(!strcmp(pt1,str2)); |
|
|
} |
|
|
} |
|
|
|
|
|
static char *wordbase(char *word, int ender) |
|
|
{ |
|
|
char *pt1; |
|
|
static char copy[WORDBUF]; |
|
|
|
|
|
strcpy(copy, word); |
|
|
if(strend(copy,sufx[ender])) { |
|
|
pt1=strchr(copy,'\0'); |
|
|
pt1 -= strlen(sufx[ender]); |
|
|
*pt1='\0'; |
|
|
strcat(copy,addr[ender]); |
|
|
} |
|
|
return(copy); |
|
|
} |
|
|
|
|
|
static int hasprep(char *s, int wdcnt) |
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
int i, wdnum; |
|
|
|
|
|
for (wdnum = 2; wdnum <= wdcnt; wdnum++) { |
|
|
s = strchr(s, '_'); |
|
|
for (s++, i = 0; i < NUMPREPS; i++) |
|
|
if (!strncmp(s, prepositions[i].str, prepositions[i].strlen) && |
|
|
(s[prepositions[i].strlen] == '_' || |
|
|
s[prepositions[i].strlen] == '\0')) |
|
|
return(wdnum); |
|
|
} |
|
|
return(0); |
|
|
} |
|
|
|
|
|
static char *exc_lookup(char *word, int pos) |
|
|
{ |
|
|
static char line[WORDBUF], *beglp, *endlp; |
|
|
char *excline; |
|
|
int found = 0; |
|
|
|
|
|
if (exc_fps[pos] == NULL) |
|
|
return(NULL); |
|
|
|
|
|
|
|
|
if(word != NULL){ |
|
|
if ((excline = bin_search(word, exc_fps[pos])) != NULL) { |
|
|
strcpy(line, excline); |
|
|
endlp = strchr(line,' '); |
|
|
} else |
|
|
endlp = NULL; |
|
|
} |
|
|
if(endlp && *(endlp + 1) != ' '){ |
|
|
beglp = endlp + 1; |
|
|
while(*beglp && *beglp == ' ') beglp++; |
|
|
endlp = beglp; |
|
|
while(*endlp && *endlp != ' ' && *endlp != '\n') endlp++; |
|
|
if(endlp != beglp){ |
|
|
*endlp='\0'; |
|
|
return(beglp); |
|
|
} |
|
|
} |
|
|
beglp = NULL; |
|
|
endlp = NULL; |
|
|
return(NULL); |
|
|
} |
|
|
|
|
|
static char *morphprep(char *s) |
|
|
{ |
|
|
char *rest, *exc_word, *lastwd = NULL, *last; |
|
|
int i, offset, cnt; |
|
|
char word[WORDBUF], end[WORDBUF]; |
|
|
static char retval[WORDBUF]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rest = strchr(s, '_'); |
|
|
last = strrchr(s, '_'); |
|
|
if (rest != last) { |
|
|
if (lastwd = morphword(last + 1, NOUN)) { |
|
|
strncpy(end, rest, last - rest + 1); |
|
|
end[last-rest+1] = '\0'; |
|
|
strcat(end, lastwd); |
|
|
} |
|
|
} |
|
|
|
|
|
strncpy(word, s, rest - s); |
|
|
word[rest - s] = '\0'; |
|
|
for (i = 0, cnt = strlen(word); i < cnt; i++) |
|
|
if (!isalnum((unsigned char)(word[i]))) return(NULL); |
|
|
|
|
|
offset = offsets[VERB]; |
|
|
cnt = cnts[VERB]; |
|
|
|
|
|
|
|
|
|
|
|
if ((exc_word = exc_lookup(word, VERB)) && |
|
|
strcmp(exc_word, word)) { |
|
|
|
|
|
sprintf(retval, "%s%s", exc_word, rest); |
|
|
if(is_defined(retval, VERB)) |
|
|
return(retval); |
|
|
else if (lastwd) { |
|
|
sprintf(retval, "%s%s", exc_word, end); |
|
|
if(is_defined(retval, VERB)) |
|
|
return(retval); |
|
|
} |
|
|
} |
|
|
|
|
|
for (i = 0; i < cnt; i++) { |
|
|
if ((exc_word = wordbase(word, (i + offset))) && |
|
|
strcmp(word, exc_word)) { |
|
|
|
|
|
sprintf(retval, "%s%s", exc_word, rest); |
|
|
if(is_defined(retval, VERB)) |
|
|
return(retval); |
|
|
else if (lastwd) { |
|
|
sprintf(retval, "%s%s", exc_word, end); |
|
|
if(is_defined(retval, VERB)) |
|
|
return(retval); |
|
|
} |
|
|
} |
|
|
} |
|
|
sprintf(retval, "%s%s", word, rest); |
|
|
if (strcmp(s, retval)) |
|
|
return(retval); |
|
|
if (lastwd) { |
|
|
sprintf(retval, "%s%s", word, end); |
|
|
if (strcmp(s, retval)) |
|
|
return(retval); |
|
|
} |
|
|
return(NULL); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|