R備忘録 - 記事一覧
- 投稿者: みゅ
- カテゴリ: なし
- 優先度: 普通
- 状態: 完了
- 日時: 2009年06月03日 13時42分39秒
levenshtein <- function(string1, string2, case=TRUE, map=NULL) {
########
#
# levenshtein algorithm in R
#
# Author : Hans-Joerg Bibiko
# Date : 29/06/2006
#
# Contact : bibiko@eva.mpg.de
#
########
#
# string1, string2 := strings to compare
#
# case = TRUE := case sensitivity; case = FALSE := case insensitivity
#
# map := character vector of c(regexp1, replacement1, regexp2, replacement2, ...)
#
# example:
# map <- c("[aeiou]","V","[^aeiou]","C") := replaces all vowels with V and all others with C
#
# levenshtein("Bank","Bond", map=map) => 0
#
########
if(!is.null(map)) {
m <- matrix(map, ncol=2, byrow=TRUE)
s <- c(ifelse(case, string1, tolower(string1)), ifelse(case, string2, tolower(string2)))
for(i in 1:dim(m)[1]) s <- gsub(m[i,1], m[i,2], s)
string1 <- s[1]
string2 <- s[2]
}
if(ifelse(case, string1, tolower(string1)) == ifelse(case, string2, tolower(string2))) return(0)
s1 <- strsplit(paste(" ", ifelse(case, string1, tolower(string1)), sep=""), NULL)[[1]]
s2 <- strsplit(paste(" ", ifelse(case, string2, tolower(string2)), sep=""), NULL)[[1]]
l1 <- length(s1)
l2 <- length(s2)
d <- matrix(nrow = l1, ncol = l2)
for(i in 1:l1) d[i,1] <- i-1
for(i in 1:l2) d[1,i] <- i-1
for(i in 2:l1) {
for(j in 2:l2) {
d[i,j] <-
min(
(d[i-1,j]+1),
(d[i,j-1]+1),
(d[i-1,j-1]+ifelse(s1[i] == s2[j], 0, 1))
)
}
}
d[l1,l2]
}
dyn.load("levenshtein.so")
if( use_c ){
.Call("levenshtein_0", as.integer(l1), as.integer(l2), s1, s2, d)
return(d[l1,l2])
}
#define ntMATLOC(mat,l,x,y) (mat)[(x)+(y)*(l)]
SEXP levenshtein_0(SEXP l1, SEXP l2, SEXP s1, SEXP s2, SEXP d)
{
int l1i = INTEGER(l1)[0];
int l2i = INTEGER(l2)[0];
int ii, jj;
int cp, cp1, cp2, cp3;
for( ii=0; ii<l1i; ii++) ntMATLOC(INTEGER(d),l1i,ii,0) = ii;
for( ii=0; ii<l2i; ii++) ntMATLOC(INTEGER(d),l1i,0,ii) = ii;
for( ii=1; ii<l1i; ii++) {
for( jj=1; jj<l2i; jj++) {
cp1 = ntMATLOC(INTEGER(d),l1i,ii-1,jj) + 1;
cp2 = ntMATLOC(INTEGER(d),l1i,ii,jj-1) + 1;
if( CHAR(VECTOR_ELT(s1,ii))[0] == CHAR(VECTOR_ELT(s2,jj))[0] ) {
cp3 = ntMATLOC(INTEGER(d),l1i,ii-1,jj-1);
} else{
cp3 = ntMATLOC(INTEGER(d),l1i,ii-1,jj-1) + 1;
}
cp = (cp1<cp2) ? cp1 : cp2;
cp = (cp<cp3) ? cp : cp3;
ntMATLOC(INTEGER(d),l1i,ii,jj) = cp;
}
}
return d;
}
PROTECT(s1 = strSplit(str_cs1));
SEXP strSplit(SEXP str)
{
SEXP ret;
char *xi, *pp;
char p[] =" ";
int ii;
pp = p;
xi = CHAR(STRING_ELT(str, 0));
PROTECT(ret = allocVector(STRSXP, 1+strlen(xi)));
SET_STRING_ELT(ret, 0, mkChar(pp));
for( ii=0; ii<strlen(xi); ii++ ){
SET_STRING_ELT(ret, ii+1, mkChar(pp));
CHAR(STRING_ELT(ret, ii+1))[0] = xi[ii];
}
UNPROTECT(1);
return ret;
}
R備忘録 /状態空間モデリング/donlp2/その他のメモ