library(ROCR) library(CCA) library(PMA) source("./side-effect.R") ########################################################## ############## import data sets #################### ########################################################## chemmat <- as.matrix(read.delim("./chemmat.txt", sep="\t")) biomat <- as.matrix(read.delim("./bio2mat.txt", sep="\t")) pharmat <- as.matrix(read.delim("./pharmat.txt", sep="\t")) newchemmat <- as.matrix(read.delim("./newchemmat.txt", sep="\t")) newbiomat <- as.matrix(read.delim("./newbio2mat.txt", sep="\t")) dim(chemmat) dim(biomat) dim(pharmat) dim(newchemmat) dim(newbiomat) #> dim(chemmat) #[1] 658 617 #> dim(biomat) #[1] 658 1368 #> dim(pharmat) #[1] 658 1339 #> dim(newchemmat) #[1] 730 617 #> dim(newbiomat) #[1] 730 1368 ########################################################## ############## carry out a sparse CCA #################### ########################################################## # OCCA drug.occa <- scca(X=scale(biomat), Y=scale(pharmat), c1=1, c2=1, ncomp=80) # SCCA drug.scca <- scca(X=scale(biomat), Y=scale(pharmat), c1=0.1, c2=0.1, ncomp=80) # indexplot for weights in OCCA op <- par(mfcol=c(4,2)) plot(drug.occa$u[,1], xlab="Target protein index", ylab="Weight", main="Component 1") plot(drug.occa$u[,2], xlab="Target protein index", ylab="Weight", main="Component 2") plot(drug.occa$u[,3], xlab="Target protein index", ylab="Weight", main="Component 3") plot(drug.occa$u[,4], xlab="Target protein index", ylab="Weight", main="Component 4") plot(drug.occa$v[,1], xlab="Side-effect index", ylab="Weight", main="Component 1") plot(drug.occa$v[,2], xlab="Side-effect index", ylab="Weight", main="Component 2") plot(drug.occa$v[,3], xlab="Side-effect index", ylab="Weight", main="Component 3") plot(drug.occa$v[,4], xlab="Side-effect index", ylab="Weight", main="Component 4") par(op) # indexplot for weights in SCCA op <- par(mfcol=c(4,2)) plot(drug.scca$u[,1], xlab="Target protein index", ylab="Weight", main="Component 1") plot(drug.scca$u[,2], xlab="Target protein index", ylab="Weight", main="Component 2") plot(drug.scca$u[,3], xlab="Target protein index", ylab="Weight", main="Component 3") plot(drug.scca$u[,4], xlab="Target protein index", ylab="Weight", main="Component 4") plot(drug.scca$v[,1], xlab="Side-effect index", ylab="Weight", main="Component 1") plot(drug.scca$v[,2], xlab="Side-effect index", ylab="Weight", main="Component 2") plot(drug.scca$v[,3], xlab="Side-effect index", ylab="Weight", main="Component 3") plot(drug.scca$v[,4], xlab="Side-effect index", ylab="Weight", main="Component 4") par(op) ########################################################## ############## cross-validation #################### ########################################################## # evaluation by 1) global AUC, 2) global AUPR # side-effect prediction from chemical structures cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=0.1, c2=0.1, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=0.2, c2=0.2, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=0.5, c2=0.5, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=1, c2=1, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] # side-effect prediction from target proteins cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=0.1, c2=0.1, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=0.2, c2=0.2, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=0.5, c2=0.5, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=1, c2=1, ncomp=80, fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] #> # side-effect prediction from chemical structures #> cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=0.1, c2=0.1, ncomp=80, #fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] #[1] 0.82994 0.34403 #> cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=0.2, c2=0.2, ncomp=80, #fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] #[1] 0.77941 0.32110 #> cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=0.5, c2=0.5, ncomp=80, #fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] #[1] 0.82686 0.36887 #> cv.sideeffect(X=chemmat, Y=pharmat, type.method="scca", c1=1, c2=1, ncomp=80, fold=5, #localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores[1:2] #[1] 0.81376 0.35686 # #> # side-effect prediction from target proteins #> cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=0.1, c2=0.1, ncomp=80, #fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores #[1] 0.88968 0.41136 #> cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=0.2, c2=0.2, ncomp=80, #fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores #[1] 0.88746 0.40940 #> cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=0.5, c2=0.5, ncomp=80, #fold=5, localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores #[1] 0.88446 0.40805 #> cv.sideeffect(X=biomat, Y=pharmat, type.method="scca", c1=1, c2=1, ncomp=80, fold=5, #localauc=T, scalex=F, centerx=T, scaley=F, centery=T)$aucscores #[1] 0.88225 0.40601 ########################################################## ############## new prediction #################### ########################################################## bio.newpred <- newpred.scca(X=biomat, Y=pharmat, Xnew=newbiomat, ncomp=80, c1=0.1, c2=0.1, centerx=T, scalex=F, centery=T, scaley=F) #> bio.newpred$Q[1:10,1:4] # abdominal.cramps abdominal.distention abdominal.pain malformations #100252 0.04830690 0.09407626 0.5517272 0.02887538 #1014 0.04830690 0.09389375 0.5517272 0.02887538 #10180 0.04830690 0.09493044 0.5704739 0.02887538 #1021 0.04830690 0.09407626 0.5517272 0.02887538 #1030 0.04830690 0.09407626 0.5517272 0.02887538 #1045 0.04830690 0.09407626 0.5517272 0.02887538 #104799 0.04830690 0.09312963 0.5527467 0.02887538 #104850 0.04830690 0.09407626 0.5517272 0.02887538 #1051 0.04830690 0.09350111 0.5517272 0.02887538 #10517 0.08119673 0.09670395 0.5577110 0.02887538