Iris <- as.matrix(read.table("http://www.cimat.mx/~ponciano/STAT90MOES02/FisherIris.txt",header=F)); data.matrix <- Iris; class.col <- 5; my.lda <- function(data.matrix,class.col) { # lectura de datos y definiciones dim.data <- dim(data.matrix); N <- dim.data[1]; p <- dim.data[2] -1; nclass <- max(data.matrix[,class.col]); nis.vec <- rep(0,nclass); # medias means.mat <- matrix(0,nrow=(nclass+1),ncol=p); overall.mean <- as.vector(apply(data.matrix[,-class.col],2,mean)); means.mat[(nclass+1),] <- overall.mean; # Matrices de varianza 'entre' y 'dentro' B <- matrix(0,nrow=p,ncol=p); W <- B; for(i in 1:nclass){ nis.vec[i] <- sum(data.matrix[,class.col]==i); ith.datamatrix <- data.matrix[data.matrix[,class.col]==i,-class.col]; x.bari <- as.vector(apply(ith.datamatrix,2,mean)); means.mat[i,] <- x.bari; B <- B+ nis.vec[i]*(x.bari-overall.mean)%*%t(x.bari-overall.mean); W <- W + (nis.vec[i]-1)*var(ith.datamatrix); } # Vectores y valores propios de (W^(-1))*B WinvtB <- solve(W)%*%B; eigens <- eigen(WinvtB); eigvals <- eigens$values; eigvects <- eigens$vector; # Calculo de los 'p' discriminantes lineales (las coordenadas de los datos en cada vector propio) ldisc.mat <- matrix(0,nrow=p,ncol=N); Spooled <- W/(N-nclass); for(i in 1:p){ raw.coords <- as.numeric(eigvects[,i]%*%t(data.matrix[,-class.col])); ldisc.mat[i,] <- (raw.coords-rep(mean(raw.coords),N))*(1/sqrt(t(eigvects[,i])%*%Spooled%*%eigvects[,i])); } # output return(list(B=B,W=W,means.mat=means.mat,eigvects = eigvects,eigvals=eigvals,ldisc.mat=ldisc.mat)); } Iris.candisc<- my.lda(data.matrix=Iris,class.col=5); # Primer y segundo discriminante first.ldisc <- Iris.candisc$ldisc.mat[1,]; second.ldisc<- Iris.candisc$ldisc.mat[2,]; # Plot en crudo sin distinccion de especies para checar los limites de los ejes: plot(first.ldisc,second.ldisc); # separar coordenadas del primer y segundo discriminante por especie Ind.sp1 <- which(Iris[,5]==1,arr.ind=T); Ind.sp2 <- which(Iris[,5]==2,arr.ind=T); Ind.sp3 <- which(Iris[,5]==3,arr.ind=T); xs.sp1 <- first.ldisc[Ind.sp1]; xs.sp2 <- first.ldisc[Ind.sp2]; xs.sp3 <- first.ldisc[Ind.sp3]; ys.sp1 <- second.ldisc[Ind.sp1]; ys.sp2 <- second.ldisc[Ind.sp2]; ys.sp3 <- second.ldisc[Ind.sp3]; # grafica plot(xs.sp1,ys.sp1, col="red",pch=16,xlim=c(-10,10),ylim=c(-3,3)); points(xs.sp2,ys.sp2,col="blue",pch=23); points(xs.sp3,ys.sp3,col="green",pch=24);