library(e1071) library(rgl) boundaries=function(y,b,n=100){ # y is the data,  b the svm object grid=expand.grid(seq(min(y[,1]),max(y[,1]),length=n),seq(min(y[,2]),max(y[,2]),length=n)) points(grid,pch=4,cex=.15,col=paste(as.character(predict(b,grid)))) } supports=function(y,b){ # y is the data,  b is the svm object points(y[b$index,],cex=2) } # Examples of SVM # --------------- y=matrix(rnorm(200),ncol=2)+rep(c(0,5),each=50) lab=rep(c("Red","Blue"),each=50) plot(y,col=lab,pch=20) b=svm(y,as.factor(lab),kernel="linear") summary(b) supports(y,b) boundaries(y,b) # Example with misclassification # ------------------------------ y=matrix(rnorm(200),ncol=2)+rep(c(0,2),each=50) lab=rep(c("Red","Blue"),each=50) plot(y,col=lab,pch=20) b=svm(y,as.factor(lab),kernel="linear") supports(y,b) summary(b) fit=as.character(b$fitted) plot(y,col=lab,pch=20) points(y[fit!=lab,],col=fit[fit!=lab],pch=5,cex=1.5) boundaries(y,b) # Example with circles # -------------------- norm=function(x){sqrt(sum(x^2))} y=matrix(rnorm(200),ncol=2) y[51:100,]=y[51:100,]+4*y[51:100,]/apply(y[51:100,],1,norm) lab=rep(c("Red","Blue"),each=50) plot(y,col=lab,pch=20) b=svm(y,as.factor(lab),kernel="linear") summary(b) fit=as.character(b$fitted) points(y[fit!=lab,],col=fit[fit!=lab],pch=5,cex=1.5) boundaries(y,b) b=svm(y,as.factor(lab),kernel="radial") summary(b) fit=as.character(b$fitted) plot(y,col=lab,pch=20) points(y[fit!=lab,],col=fit[fit!=lab],pch=5,cex=1.5) boundaries(y,b) plot(y,col=lab,pch=20) points(y[fit!=lab,],col=fit[fit!=lab],pch=5,cex=1.5) supports(y,b) # Check new data y2=matrix(rnorm(200),ncol=2) y2[51:100,]=y2[51:100,]+4*y2[51:100,]/apply(y2[51:100,],1,norm) lab=rep(c("Red","Blue"),each=50) fit=as.character(predict(b,y2)) plot(y2,col=fit,pch=20) points(matrix(y2[fit!=lab,],ncol=2),cex=1.2) # Example with many clusters # -------------------------- norm=function(x){sqrt(sum(x^2))} y=matrix(rnorm(400),ncol=2) y[51:100,]=y[51:100,]+3*y[51:100,]/apply(y[51:100,],1,norm) y[101:150,]=y[101:150,]+c(4,4) y[151:200,]=y[151:200,]+6*y[151:200,]/apply(y[151:200,],1,norm) lab=rep(c("Red","Blue","Green","Black"),each=50) plot(y,col=lab,pch=20) chk=sample(1:200,20) b=svm(y[-chk,],as.factor(lab[-chk]),kernel="radial") summary(b) boundaries(y,b) supports(y,b) fit=as.character(predict(b,y[chk,])) plot(y[-chk,],col=lab[-chk],pch=20) points(y[chk,],col=fit,pch=3) # Points with wrong category points(matrix(y[chk[fit!=lab[chk]],],ncol=2),col=lab[chk[fit!=lab[chk]]],pch=7) # Example with music files # ------------------------ # Real data. List of songs. Variables are based on the analysis # of the signal of 30 seconds of music. # Use that data to predict the type of songs. a=read.table("music_data_17_10_05.exa",sep=" ")[,-52] x=array(as.numeric(as.matrix(a[,2:50])),dim(a)) rem=apply(is.na(x),1,sum) tit=a[!rem,1] genre=a[!rem,51] x=x[!rem,] x=x[,apply(x,2,sd)!=0] # Visualization pairs(x[,sample(1:46,5)],col=lab,pch=20) keep=sample(1:46,3); kp=paste("var",keep); plot3d(x[,keep],size=3,col=lab,xlab=kp[1],ylab=kp[2],zlab=kp[3],axes=FALSE) # Remove data for validation chk=sample(1:1559,200) # Different SVM b=svm(x[-chk,],genre[-chk]) sum(predict(b,x[chk,])==genre[chk]) b1=svm(x[-chk,],genre[-chk],kernel="linear") sum(predict(b1,x[chk,])==genre[chk]) b2=svm(x[-chk,],genre[-chk],kernel="polynomial",degree=2) sum(predict(b2,x[chk,])==genre[chk]) b3=svm(x[-chk,],genre[-chk],kernel="polynomial") sum(predict(b3,x[chk,])==genre[chk]) # Summary table(paste("Truth =",genre[chk]),predict(b1,x[chk,])) # Vary the value of C b1=svm(x[-chk,],genre[-chk],kernel="linear") sum(predict(b1,x[chk,])==genre[chk]) b1=svm(x[-chk,],genre[-chk],kernel="linear",cost=10) sum(predict(b1,x[chk,])==genre[chk]) b1=svm(x[-chk,],genre[-chk],kernel="linear",cost=.1) sum(predict(b1,x[chk,])==genre[chk]) b1=svm(x[-chk,],genre[-chk],kernel="linear",cost=100) sum(predict(b1,x[chk,])==genre[chk]) # Data from the book x=matrix(scan("x.dat"),ncol=2) y=scan("y.dat") y=c("orange","blue")[2-y] plot(x,col=y,pch=20) b=svm(x,as.factor(y)) boundaries(x,b) supports(x,b) b=svm(x,as.factor(y),kernel="polynomial",degree=4) boundaries(x,b) supports(x,b)