# read in the data a <- read.table("t.data",sep=",",header=T) classes <- a[,42] # use only the normal data in training normal <- classes=="normal." v <- c(1,5:6,23,32,34,36) b <- a[normal,v] norm.data <- b[1:1000,] # compute means and standard deviations norm.means <- apply(norm.data,2,mean) norm.stds <- apply(norm.data,2,sd) # standardize the normal data sc <- scale(norm.data,center=norm.means,scale=norm.stds) # see how many normal observations have at least one variable # that is three standard deviations from the mean for that variable. # Training data: x <- apply(sc,2,function(x)abs(x)>3) y <- apply(x,1,any) sum(y)/nrow(sc) # Compute the maximum absolute deviation from the mean for each variable. mynorm <- function(x,mu,std){ sc <- scale(x,center=mu,scale=std) apply(sc,1,function(y) max(abs(y))) } norm.test <- b[-(1:1000),] abnorm.test <- a[!normal,v] taus <- seq(0,100,length=300) FA <- rep(0,length(taus)) D <- rep(0,length(taus)) for(i in 1:length(taus)){ FA[i] <- sum(mynorm(norm.test,norm.means,norm.stds)>taus[i]) D[i] <- sum(mynorm(abnorm.test,norm.means,norm.stds)>taus[i]) cat(i," ") } plot(FA/1000,D/1000,xlab="PFA",ylab="PD",type="l",xlim=0:1,ylim=0:1) # Note: this version, provided in the HW, has a bug. # See the next definition hotelling <- function(x,p,n,mu,IS) { x <- matrix(x,ncol=p) n*(n-p)/(p*(n+1)*(n-1)) * (x %*% IS %*% t(x)) } hotelling <- function(x,p,n,mu,IS) { x <- scale(matrix(x,ncol=p),center=mu,scale=F) n*(n-p)/(p*(n+1)*(n-1)) * (x %*% IS %*% t(x)) } #You can compute IS by: v <- var(norm.data) IS <- solve(v) #You can now compute the hotelling statistic for the normal data: z <- apply(norm.data,1,hotelling,mu=norm.means,IS=IS,n=nrow(norm.data), p=ncol(norm.data)) plot(z,pch=20) z <- apply(norm.test,1,hotelling,mu=norm.means,IS=IS,n=nrow(norm.data), p=ncol(norm.data)) az <- apply(abnorm.test,1,hotelling,mu=norm.means,IS=IS,n=nrow(norm.data), p=ncol(norm.data)) sz <- sort(z,decreasing=T) hz <- NULL for(i in 1:length(z)){ hz <- rbind(hz,c((i-1)/nrow(norm.test),sum(az>sz[i])/nrow(abnorm.test))) } plot(FA/1000,D/1000,xlab="PFA",ylab="PD",type="l",xlim=0:1,ylim=0:1,lwd=3) lines(hz,col=2,lwd=3) chisq <- function(x,mu) { x <- matrix(x,ncol=length(x)) z <- scale(scale(x,center=mu,scale=F)^2,center=F,scale=mu) sum(z) } z <- apply(norm.test,1,chisq,mu=norm.means) az <- apply(abnorm.test,1,chisq,mu=norm.means) sz <- sort(z,decreasing=T) cz <- NULL for(i in 1:length(z)){ cz <- rbind(cz,c((i-1)/nrow(norm.test),sum(az>sz[i])/nrow(abnorm.test))) } lines(cz,col=4,lwd=3) legend(.4,.6,legend=c("Normal","Hotelling",expression(chi^2)),col=c(1,2,4), lty=1,lwd=3)