library(ROCR) library(PMA) library(kernlab) library(CCA) scca <- function (X, Y, c1, c2, ncomp=4) { Xraw <- X; Yraw <- Y colindex.nonzero.x <- (1:ncol(X))[apply(X,2,sd)!=0] colindex.nonzero.y <- (1:ncol(Y))[apply(Y,2,sd)!=0] X <- X[,colindex.nonzero.x]; Y <- Y[,colindex.nonzero.y] #X <- scale(X); Y <- scale(Y) nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx # canonical correlation dvec <- rep(0, ncomp) # weight matrix features of non-zeros #umat <- matrix(0, px, ncomp) #vmat <- matrix(0, py, ncomp) # weight matrix for all input features umatall <- matrix(0, ncol(Xraw), ncomp) vmatall <- matrix(0, ncol(Yraw), ncomp) result <- CCA(x=X, z=Y, typex = "standard", typez = "standard", penaltyx = c1, penaltyz = c2, K = ncomp, niter = 15, v = NULL, trace = FALSE, standardize = FALSE, xnames = NULL, znames = NULL, chromx = NULL, chromz = NULL, upos = FALSE, uneg = FALSE, vpos = FALSE, vneg = FALSE, outcome = NULL, y = NULL, cens = NULL) dvec <- result$d # adjustment of positive and negatieve for (k in 1:ncomp){ wmaxorderx <- order(abs(result$u[,k]), decreasing=T)[1] if (result$u[wmaxorderx,k] < 0){ result$u[,k] <- - result$u[,k] result$v[,k] <- - result$v[,k] } } umatall[colindex.nonzero.x, ] <- result$u vmatall[colindex.nonzero.y, ] <- result$v rownames(umatall) <- colnames(Xraw) rownames(vmatall) <- colnames(Yraw) scorex <- X %*% result$u scorey <- Y %*% result$v rownames(scorex) <- rownames(Xraw) rownames(scorey) <- rownames(Yraw) corvec <- rep(0,ncomp) for (j in 1:ncomp){ corvec[j] <- cor(scorex[,j],scorey[,j]) } #list(u=umatall, v=vmatall, d=dvec) list(u=umatall, v=vmatall, d=dvec, scorex=scorex, scorey=scorey, rho=corvec) } newpred.scca <- function(X, Y, Xnew, c1=NULL, c2=NULL, ncomp=4, centerx=T, scalex=T, centery=T, scaley=T){ nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py ### training set ### Xt <- X Yt <- Y ### adjust prediction set such that the columns are the same as those of training X ### nxnew <- nrow(Xnew); pxnew <- ncol(Xnew) Xp <- matrix(0, nxnew, px) rownames(Xp) <- rownames(Xnew) colnames(Xp) <- colnames(X) Xp[,intersect(colnames(Xt),colnames(Xnew))] <- Xnew[,intersect(colnames(Xt),colnames(Xnew))] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (centery==T){ Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) #Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } if (scaley==T){ Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) #Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } # training result.scca <- scca(X=Xt, Y=Yt, c1=c1, c2=c2, ncomp=ncomp) ur <- result.scca$u vr <- result.scca$v dr <- result.scca$d corvecr <- rep(0,ncomp) for (j in 1:ncomp){ corvecr[j] <- cor(result.scca$scorex[,j],result.scca$scorey[,j]) } # test ##### Edouard score ##### # Q <- (Xp%*%ur) %*% diag(dr) %*% t(vr) Q <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) if (scaley==T){ Q <- Q * matrix(ysd, nxnew, py, byrow=T) } if (centery==T){ Q <- Q + matrix(ymean, nxnew, py, byrow=T) } rownames(Q) <- rownames(Xnew) colnames(Q) <- colnames(Y) list(Q=Q) } newpred.sideeffect <- function(X, Y, Xnew, centerx=T, scalex=F, centery=T, scaley=F, type.method="scca", type.CCApredscore="default", c1=0.2, c2=0.2, ncomp=4, lambda=0.1, type.kernel="lin", sigma=0.5, deg=3, userkernel=F, Kinput=NULL, Kx=NULL, Kx1=NULL, Kx2=NULL, Kxnew=NULL, Kxnew1=NULL, Kxnew2=NULL, lambdax1=0.1, lambdax2=0.1, rcclambda1=0.2, rcclambda2=0.2, svmkernel ="rbfdot", svmkpar = "automatic", svmC = 1) { nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py ### training set ### Xt <- X Yt <- Y ### adjust prediction set such that the columns are the same as those of training X ### nxnew <- nrow(Xnew); pxnew <- ncol(Xnew) Xp <- matrix(0, nxnew, px) rownames(Xp) <- rownames(Xnew) colnames(Xp) <- colnames(X) Xp[,intersect(colnames(Xt),colnames(Xnew))] <- Xnew[,intersect(colnames(Xt),colnames(Xnew))] if (type.method == "kreg"){ if (userkernel == T){ #K <- Kinput # training x training K <- Kx # test x training Knew <- Kxnew } else { Xall <- rbind(X, Xnew) Kall <- kernelmat(X=Xall, kernel.type=type.kernel, sigma=sigma, deg=deg) K <- Kall[1:nx,1:nx] Knew <- Kall[(nx+1):(nx+nxnew),1:nx] } } if (type.method == "mkreg"){ if (userkernel == T){ K1 <- Kx1 K2 <- Kx2 K1new <- Kx1new K2new <- Kx2new } else { break } } # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (centery==T){ Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) #Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } if (scaley==T){ Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) #Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } # training if (type.method == "scca"){ result.ps <- pscore.scca(Xt=Xt, Xp=Xp, Yt=Yt, c1=c1, c2=c2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) Qp <- result.ps$Qp } else if (type.method == "rcc"){ result.ps <- pscore.rcc(Xt=Xt, Xp=Xp, Yt=Yt, rcclambda1=rcclambda1, rcclambda2=rcclambda2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) Qp <- result.ps$Qp } else if (type.method == "bayse"){ } else if (type.method == "nn"){ } else if (type.method == "reg"){ } else if (type.method == "kreg"){ Ktt <- K Kpt <- Knew result.ps.kreg <- pscore.kreg(Ktt=Ktt, Kpt=Kpt, Yt=Yt, lambda=lambda) Qp <- result.ps.kreg$Qp } else if (type.method == "mkreg"){ Ktt1 <- K1 Kpt1 <- K1new Ktt2 <- K2 Kpt2 <- K2new result.ps.mkreg <- pscore.mkreg(Ktt1=Ktt1, Kpt1=Kpt1, Ktt2=Ktt2, Kpt2=Kpt2, Yt=Yt, lambda1=lambdax1, lambda2=lambdax2) #result.ps.mkreg <- pscore.mkreg(Ktt1=Ktt1, Kpt1=Kpt1, Ktt2=Ktt2, Kpt2=Kpt2, Yt=Yt, lambda1=lambdax1, lambda2=lambdax2, zero.differentKmat=T) Qp <- result.ps.mkreg$Qp } else if (type.method == "control"){ Qp <- matrix(0, nrow(Yp), ncol(Yp)) #} else if (type.method == "random"){ # Qp <- matrix(rnorm(nrow(Yp)*ncol(Yp)), nrow(Yp), ncol(Yp)) } else { break } if (scaley==T){ Qp <- Qp * matrix(ysd, nrow(Xp), ncol(Yt), byrow=T) } if (centery==T){ Qp <- Qp + matrix(ymean, nrow(Xp), ncol(Yt), byrow=T) } rownames(Qp) <- rownames(Xnew) colnames(Qp) <- colnames(Y) list(Q=Qp) } pscore.rcc <- function(Xt, Xp, Yt, rcclambda1=0.2, rcclambda2=0.2, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, localauc=F, type.CCApredscore="default"){ library(CCA) result.rcc <- rcc(X=Xt, Y=Yt, lambda1=rcclambda1, lambda2=rcclambda2) ur <- result.rcc[[3]][,1:ncomp] vr <- result.rcc[[4]][,1:ncomp] #dr <- result.rcc[[1]] #corvecr <- rep(0,ncomp) #for (j in 1:ncomp){ # corvecr[j] <- cor(result.scca$scorex[,j],result.scca$scorey[,j]) #} corvecr <- result.rcc[[1]][1:ncomp] # test if (type.CCApredscore=="default"){ # Edouard score Qp <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) list(Qp=Qp) } else { # No weight #Qp1 <- (Xp%*%ur) %*% t(vr) # Yoshi score #Qp2 <- (Xp%*%ur) %*% diag(dr) %*% t(vr) # Edouard score #Qp3 <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) Qp1 <- (Xp%*%ur) %*% t(vr) Qp2 <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) # Recomb score ##### vrtvr <- t(vr) %*% vr eps <- 0.000001 if (min(eigen(vrtvr,T)$values) < eps){ vrtvrinv <- geninv(vrtvr, eps=eps) } else { vrtvrinv <- solve(vrtvr) } #Qp4 <- Xp %*% ur %*% vrtvrinv %*% t(vr) #Qp5 <- Xp %*% ur %*% diag(dr) %*% vrtvrinv %*% t(vr) #Qp6 <- Xp %*% ur %*% diag(corvecr) %*% vrtvrinv %*% t(vr) Qp3 <- Xp %*% ur %*% vrtvrinv %*% t(vr) Qp4 <- Xp %*% ur %*% diag(corvecr) %*% vrtvrinv %*% t(vr) vrvrt <- vr %*% t(vr) eps <- 0.000001 if (min(eigen(vrvrt,T)$values) < eps){ vrvrtinv <- geninv(vrvrt, eps=eps) } else { vrvrtinv <- solve(vrvrt) } #Qp4 <- Xp %*% ur %*% t(vr) %*% vrvrtinv #Qp5 <- Xp %*% ur %*% diag(dr) %*% t(vr) %*% vrvrtinv #Qp6 <- Xp %*% ur %*% diag(corvecr) %*% t(vr) %*% vrvrtinv Qp5 <- Xp %*% ur %*% t(vr) %*% vrvrtinv Qp6 <- Xp %*% ur %*% diag(corvecr) %*% t(vr) %*% vrvrtinv Qp <- Qp2 list(Qp=Qp,Qp1=Qp1,Qp2=Qp2,Qp3=Qp3,Qp4=Qp4,Qp5=Qp5,Qp6=Qp6) } } pscore.scca <- function(Xt, Xp, Yt, c1=0.2, c2=0.2, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, localauc=F, type.CCApredscore="default"){ result.scca <- scca(X=Xt, Y=Yt, c1=c1, c2=c2, ncomp=ncomp) ur <- result.scca$u vr <- result.scca$v dr <- result.scca$d corvecr <- result.scca$rho #corvecr <- rep(0,ncomp) #for (j in 1:ncomp){ # corvecr[j] <- cor(result.scca$scorex[,j],result.scca$scorey[,j]) #} # test if (type.CCApredscore=="default"){ # Edouard score Qp <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) list(Qp=Qp) } else { # No weight #Qp1 <- (Xp%*%ur) %*% t(vr) # Yoshi score #Qp2 <- (Xp%*%ur) %*% diag(dr) %*% t(vr) # Edouard score #Qp3 <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) Qp1 <- (Xp%*%ur) %*% t(vr) Qp2 <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) # Recomb score ##### vrtvr <- t(vr) %*% vr eps <- 0.000001 if (min(eigen(vrtvr,T)$values) < eps){ vrtvrinv <- geninv(vrtvr, eps=eps) } else { vrtvrinv <- solve(vrtvr) } #Qp4 <- Xp %*% ur %*% vrtvrinv %*% t(vr) #Qp5 <- Xp %*% ur %*% diag(dr) %*% vrtvrinv %*% t(vr) #Qp6 <- Xp %*% ur %*% diag(corvecr) %*% vrtvrinv %*% t(vr) Qp3 <- Xp %*% ur %*% vrtvrinv %*% t(vr) Qp4 <- Xp %*% ur %*% diag(corvecr) %*% vrtvrinv %*% t(vr) vrvrt <- vr %*% t(vr) eps <- 0.000001 if (min(eigen(vrvrt,T)$values) < eps){ vrvrtinv <- geninv(vrvrt, eps=eps) } else { vrvrtinv <- solve(vrvrt) } #Qp4 <- Xp %*% ur %*% t(vr) %*% vrvrtinv #Qp5 <- Xp %*% ur %*% diag(dr) %*% t(vr) %*% vrvrtinv #Qp6 <- Xp %*% ur %*% diag(corvecr) %*% t(vr) %*% vrvrtinv Qp5 <- Xp %*% ur %*% t(vr) %*% vrvrtinv Qp6 <- Xp %*% ur %*% diag(corvecr) %*% t(vr) %*% vrvrtinv Qp <- Qp2 list(Qp=Qp,Qp1=Qp1,Qp2=Qp2,Qp3=Qp3,Qp4=Qp4,Qp5=Qp5,Qp6=Qp6) } } #pscore.kreg <- function(Xt, Xp, Yt, Yp, lambda=0.1) pscore.kreg <- function(Ktt, Kpt, Yt, lambda=0.1) { #nt <- nrow(Xt); np <- nrow(Xp) #n <- nt + np #Ktt <- cormat.cosine(Xt, Xt) #Kpt <- cormat.cosine(Xp, Xt) nt <- nrow(Ktt); np <- nrow(Kpt) n <- nt + np Itt <- diag(rep(1,nt)) W <- solve(Ktt %*% t(Ktt) + lambda * Itt) %*% Ktt %*% Yt Yhatt <- Ktt %*% W Yhatp <- Kpt %*% W list(Qp=Yhatp) } pscore.mkreg <- function(Ktt1, Ktt2, Kpt1, Kpt2, Yt, lambda1=0.1, lambda2=0.1, zero.differentKmat=F) { #nt <- nrow(Xt); np <- nrow(Xp) #n <- nt + np #Ktt <- cormat.cosine(Xt, Xt) #Kpt <- cormat.cosine(Xp, Xt) nt1 <- nrow(Ktt1); np1 <- nrow(Kpt1) n1 <- nt1 + np1 nt2 <- nrow(Ktt2); np2 <- nrow(Kpt2) n2 <- nt2 + np2 p <- ncol(Yt) if (n1 == n2){ n <- n1; nt <- nt1; np <- np1 } else { break } Itt <- diag(rep(1,nt)) KKmat <- matrix(0, 2*nt, 2*nt) KYmat <- matrix(0, 2*nt, p) KKmat[1:nt,1:nt] <- Ktt1 %*% Ktt1 + lambda1 * Itt if (zero.differentKmat == F){ KKmat[1:nt,(1:nt)+nt] <- Ktt1 %*% Ktt2 KKmat[(1:nt)+nt,1:nt] <- Ktt2 %*% Ktt1 } else { KKmat[1:nt,(1:nt)+nt] <- 0 KKmat[(1:nt)+nt,1:nt] <- 0 } KKmat[(1:nt)+nt,(1:nt)+nt] <- Ktt2 %*% Ktt2 + lambda2 * Itt KYmat[1:nt,] <- Ktt1 %*% Yt KYmat[(1:nt)+nt,] <- Ktt2 %*% Yt Wmat <- solve(KKmat) %*% KYmat W1 <- Wmat[1:nt,] W2 <- Wmat[(1:nt)+nt,] #Yhatt <- Ktt1 %*% W1 + Ktt2 %*% W2 Yhatp <- Kpt1 %*% W1 + Kpt2 %*% W2 list(Qp=Yhatp) } pscore.nn <- function(Xt, Xp, At, fold=5, nneighbor=5){ nt <- nrow(Xt) np <- nrow(Xp) py <- ncol(At) Qp <- matrix(0, np, py) Spt <- Xp %*% t(Xt) for (i in 1:np){ tindex <- 1:nt maxidx <- tindex[order(Spt[i,tindex],decreasing=T)[1:nneighbor]] if (nneighbor==1){ meanprofile <- At[maxidx,] } else { meanprofile <- apply(At[maxidx,],2,mean) } Qp[i,] <- meanprofile } list(Qp=Qp) } pscore.randsample <- function(Xt, Xp, At, fold=5){ nt <- nrow(Xt) np <- nrow(Xp) py <- ncol(At) Qp <- matrix(0, np, py) for (j in 1:py){ predvec <- rep(0, np) npos <- sum(At[,j]) npostest <- trunc(np * npos / nt) predvec[sample(1:np,npostest)] <- 1 Qp[,j] <- predvec } list(Qp=Qp) } pscore.svm <- function(Xt, Xp, At, fold=5, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ nt <- nrow(Xt) np <- nrow(Xp) py <- ncol(At) Qp <- matrix(0, np, py) for (k in 1:py){ if (length(unique(At[,k]))==1){ # no training # test prediction score testpred <- rep(-1, np) #testpred <- rep(0, np) Qp[,k] <- testpred } else { # training # not library(e1071) library(kernlab) model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=scaled, kernel=kernel, kpar=kpar, C=C) #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=F, kernel="rbfdot", kpar=list(sigma=0.1), C=1) #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=T, kernel="rbfdot", kpar=list(sigma=0.1), C=1) # test testpred <- predict(model.svm, Xp, type="decision") Qp[,k] <- testpred } } list(Qp=Qp) } cv.sideeffect.svm <- function(X, Y, fold=5, randomseed=1, centerx=T, scalex=F, centery=T, scaley=F, localauc=F, type.method="svm", svmscaled=TRUE, svmkernel ="rbfdot", svmkpar = "automatic", svmC=1, cvindex) { A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx; nkeyword <- py rsize <- trunc(n / fold) index <- 1:n { # prediction score matrix Q <- matrix(0, n, py) rownames(Q) <- rownames(A); colnames(Q) <- colnames(A) for (r in 1:fold){ ## random seed #set.seed(randomseed - 1 + r) ## remove a gene #if (length(index) >= rsize){ # rindex <- sample(index, size=rsize) # index <- setdiff(index, rindex) #} else { # rindex <- index # index <- setdiff(index, rindex) #} #pindex.r <- rindex #tindex.r <- (1:n)[-pindex.r] pindex.r <- (1:n)[cvindex[,r]==1] tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ]; Yt <- Y[tindex.r, ] Xp <- X[pindex.r, ]; Yp <- Y[pindex.r, ] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } # training if (type.method == "svm"){ At <- A[tindex.r,] Ap <- A[pindex.r,] #library(kernlab) result.ps.svm <- pscore.svm(Xt=Xt, Xp=Xp, At=At, fold=fold, scaled=svmscaled, kernel=svmkernel, kpar=svmkpar, C=svmC) Qp <- result.ps.svm$Qp } else { break } Q[pindex.r,] <- Qp } } # prediction accuracy for top 100 predictions for each drug accuracymat <- matrix(0, nx, 10) for (i in 1:nx){ for (j in 1:10){ ntop <- 10*j accuracymat[i,j] <- sum(A[i,order(Q[i,], decreasing=T)[1:ntop]]) / ntop } } # AUC for ROC curve and Precision-Recall curve aucresult <- eval.auc.curves(Q, A, localauc=localauc) list(aucscores=aucresult$aucscores, Q=Q, auc.roc=aucresult$auc.roc, auc.prc=aucresult$auc.prc, localauc.roc=aucresult$localauc.roc, localauc.prc=aucresult$localauc.prc, accuracymat=accuracymat) } cv.sideeffect <- function(X, Y, fold=5, randomseed=1, centerx=T, scalex=F, centery=T, scaley=F, localauc=F, type.method="scca", type.CCApredscore="default", c1=0.2, c2=0.2, ncomp=4, lambda=0.1, type.kernel="lin", sigma=0.5, deg=3, userkernel=F, Kinput=NULL, Kx1=NULL, Kx2=NULL, lambdax1=0.1, lambdax2=0.1, rcclambda1=0.2, rcclambda2=0.2, svmscaled=TRUE, svmkernel ="rbfdot", svmkpar = "automatic", svmC=1, nneighbor=5) { A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx; nkeyword <- py rsize <- trunc(n / fold) index <- 1:n if (type.method == "random"){ # prediction score matrix Q <- matrix(rnorm(ny*py), ny, py) rownames(Q) <- rownames(A); colnames(Q) <- colnames(A) } else { # prediction score matrix Q <- matrix(0, n, py) rownames(Q) <- rownames(A); colnames(Q) <- colnames(A) if (type.CCApredscore == "various") { # only for CCA-based approaach Q1 <- matrix(0,n,py); Q2 <- matrix(0,n,py); Q3 <- matrix(0,n,py); Q4 <- matrix(0,n,py); Q5 <- matrix(0,n,py); Q6 <- matrix(0,n,py) } if (type.method == "kreg"){ if (userkernel == T){ K <- Kinput } else { K <- kernelmat(X=X, kernel.type=type.kernel, sigma=sigma, deg=deg) } } if (type.method == "mkreg"){ if (userkernel == T){ K1 <- Kx1 K2 <- Kx2 } else { break } } for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ]; Yt <- Y[tindex.r, ] Xp <- X[pindex.r, ]; Yp <- Y[pindex.r, ] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (centery==T){ Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } if (scaley==T){ Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } # training if (type.method == "scca"){ #result.ps <- pscore.scca(Xt=Xt, Xp=Xp, Yt=Yt, Yp=Yp, c1=c1, c2=c2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) result.ps <- pscore.scca(Xt=Xt, Xp=Xp, Yt=Yt, c1=c1, c2=c2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) Qp <- result.ps$Qp if (type.CCApredscore=="various"){ Qp1 <- result.ps$Qp1; Qp2 <- result.ps$Qp2; Qp3 <- result.ps$Qp3; Qp4 <- result.ps$Qp4; Qp5 <- result.ps$Qp5; Qp6 <- result.ps$Qp6 } } else if (type.method == "rcc"){ #result.ps <- pscore.rcc(Xt=Xt, Xp=Xp, Yt=Yt, Yp=Yp, rcclambda1=rcclambda1, rcclambda2=rcclambda2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) result.ps <- pscore.rcc(Xt=Xt, Xp=Xp, Yt=Yt, rcclambda1=rcclambda1, rcclambda2=rcclambda2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) Qp <- result.ps$Qp if (type.CCApredscore=="various"){ Qp1 <- result.ps$Qp1; Qp2 <- result.ps$Qp2; Qp3 <- result.ps$Qp3; Qp4 <- result.ps$Qp4; Qp5 <- result.ps$Qp5; Qp6 <- result.ps$Qp6 } } else if (type.method == "svm"){ At <- A[tindex.r,] Ap <- A[pindex.r,] #library(kernlab) result.ps.svm <- pscore.svm(Xt=Xt, Xp=Xp, At=At, fold=fold, scaled=svmscaled, kernel=svmkernel, kpar=svmkpar, C=svmC) #result.ps.svm <- pscore.svm(Xt=Xt, Xp=Xp, At=At, fold=fold, scaled=F, kernel=svmkernel, kpar=svmkpar, C=svmC) Qp <- result.ps.svm$Qp } else if (type.method == "bayse"){ } else if (type.method == "nn"){ At <- A[tindex.r,] Ap <- A[pindex.r,] #library(kernlab) result.ps.nn <- pscore.nn(Xt=Xt, Xp=Xp, At=At, fold=fold, nneighbor=nneighbor) Qp <- result.ps.nn$Qp } else if (type.method == "reg"){ } else if (type.method == "kreg"){ Ktt <- K[tindex.r,tindex.r] Kpt <- K[pindex.r,tindex.r] result.ps.kreg <- pscore.kreg(Ktt=Ktt, Kpt=Kpt, Yt=Yt, lambda=lambda) Qp <- result.ps.kreg$Qp } else if (type.method == "mkreg"){ Ktt1 <- K1[tindex.r,tindex.r] Kpt1 <- K1[pindex.r,tindex.r] Ktt2 <- K2[tindex.r,tindex.r] Kpt2 <- K2[pindex.r,tindex.r] result.ps.mkreg <- pscore.mkreg(Ktt1=Ktt1, Kpt1=Kpt1, Ktt2=Ktt2, Kpt2=Kpt2, Yt=Yt, lambda1=lambdax1, lambda2=lambdax2) #result.ps.mkreg <- pscore.mkreg(Ktt1=Ktt1, Kpt1=Kpt1, Ktt2=Ktt2, Kpt2=Kpt2, Yt=Yt, lambda1=lambdax1, lambda2=lambdax2, zero.differentKmat=T) Qp <- result.ps.mkreg$Qp } else if (type.method == "control"){ Qp <- matrix(0, nrow(Yp), ncol(Yp)) } else if (type.method == "randsample"){ At <- A[tindex.r,] Ap <- A[pindex.r,] Qp <- pscore.randsample(Xt=Xt, Xp=Xp, At=At, fold=5)$Qp #} else if (type.method == "random"){ # Qp <- matrix(rnorm(nrow(Yp)*ncol(Yp)), nrow(Yp), ncol(Yp)) } else { break } if (scaley==T){ if (type.method != "svm"){ Qp <- Qp * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) } } if (centery==T){ if (type.method != "svm"){ Qp <- Qp + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } } Q[pindex.r,] <- Qp if (type.CCApredscore == "various"){ meanmat <- matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) sdmat <- matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) if (scaley==T){ Qp1 <- Qp1 * sdmat; Qp2 <- Qp2 * sdmat; Qp3 <- Qp3 * sdmat; Qp4 <- Qp4 * sdmat; Qp5 <- Qp5 * sdmat; Qp6 <- Qp6 * sdmat } if (centery==T){ Qp1 <- Qp1 + meanmat; Qp2 <- Qp2 + meanmat; Qp3 <- Qp3 + meanmat; Qp4 <- Qp4 + meanmat; Qp5 <- Qp5 + meanmat; Qp6 <- Qp6 + meanmat } Q1[pindex.r,] <- Qp1; Q2[pindex.r,] <- Qp2; Q3[pindex.r,] <- Qp3; Q4[pindex.r,] <- Qp4; Q5[pindex.r,] <- Qp5; Q6[pindex.r,] <- Qp6 } } } # prediction accuracy for top 100 predictions for each drug accuracymat <- matrix(0, nx, 10) for (i in 1:nx){ for (j in 1:10){ ntop <- 10*j accuracymat[i,j] <- sum(A[i,order(Q[i,], decreasing=T)[1:ntop]]) / ntop } } # AUC for ROC curve and Precision-Recall curve if (type.CCApredscore == "default"){ aucresult <- eval.auc.curves(Q, A, localauc=localauc) list(aucscores=aucresult$aucscores, Q=Q, auc.roc=aucresult$auc.roc, auc.prc=aucresult$auc.prc, localauc.roc=aucresult$localauc.roc, localauc.prc=aucresult$localauc.prc, accuracymat=accuracymat) } else if (type.CCApredscore == "various"){ aucresult <- eval.auc.curves.variousscore(Q1,Q2,Q3,Q4,Q5,Q6,A, localauc=localauc) #list(aucvec.roc=aucresult$aucvec.roc, aucvec.prc=aucresult$aucvec.prc, aucscores=aucresult$aucscores) list(aucscores=aucresult$aucscores) } } cv.sideeffect.cvindex <- function(X, Y, fold=5, randomseed=1, centerx=T, scalex=F, centery=T, scaley=F, localauc=F, type.method="scca", type.CCApredscore="default", c1=0.2, c2=0.2, ncomp=4, lambda=0.1, type.kernel="lin", sigma=0.5, deg=3, userkernel=F, Kinput=NULL, Kx1=NULL, Kx2=NULL, lambdax1=0.1, lambdax2=0.1, rcclambda1=0.2, rcclambda2=0.2, svmscaled=TRUE, svmkernel ="rbfdot", svmkpar = "automatic", svmC=1, nneighbor=5, cvindex) { A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx; nkeyword <- py rsize <- trunc(n / fold) index <- 1:n if (type.method == "random"){ # prediction score matrix Q <- matrix(rnorm(ny*py), ny, py) rownames(Q) <- rownames(A); colnames(Q) <- colnames(A) } else { # prediction score matrix Q <- matrix(0, n, py) rownames(Q) <- rownames(A); colnames(Q) <- colnames(A) if (type.CCApredscore == "various") { # only for CCA-based approaach Q1 <- matrix(0,n,py); Q2 <- matrix(0,n,py); Q3 <- matrix(0,n,py); Q4 <- matrix(0,n,py); Q5 <- matrix(0,n,py); Q6 <- matrix(0,n,py) } if (type.method == "kreg"){ if (userkernel == T){ K <- Kinput } else { K <- kernelmat(X=X, kernel.type=type.kernel, sigma=sigma, deg=deg) } } if (type.method == "mkreg"){ if (userkernel == T){ K1 <- Kx1 K2 <- Kx2 } else { break } } for (r in 1:fold){ ## random seed #set.seed(randomseed - 1 + r) ## remove a gene #if (length(index) >= rsize){ # rindex <- sample(index, size=rsize) # index <- setdiff(index, rindex) #} else { # rindex <- index # index <- setdiff(index, rindex) #} #pindex.r <- rindex #tindex.r <- (1:n)[-pindex.r] pindex.r <- (1:n)[cvindex[,r]==1] tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ]; Yt <- Y[tindex.r, ] Xp <- X[pindex.r, ]; Yp <- Y[pindex.r, ] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (centery==T){ Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } if (scaley==T){ Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } # training if (type.method == "scca"){ #result.ps <- pscore.scca(Xt=Xt, Xp=Xp, Yt=Yt, Yp=Yp, c1=c1, c2=c2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) result.ps <- pscore.scca(Xt=Xt, Xp=Xp, Yt=Yt, c1=c1, c2=c2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) Qp <- result.ps$Qp if (type.CCApredscore=="various"){ Qp1 <- result.ps$Qp1; Qp2 <- result.ps$Qp2; Qp3 <- result.ps$Qp3; Qp4 <- result.ps$Qp4; Qp5 <- result.ps$Qp5; Qp6 <- result.ps$Qp6 } } else if (type.method == "rcc"){ #result.ps <- pscore.rcc(Xt=Xt, Xp=Xp, Yt=Yt, Yp=Yp, rcclambda1=rcclambda1, rcclambda2=rcclambda2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) result.ps <- pscore.rcc(Xt=Xt, Xp=Xp, Yt=Yt, rcclambda1=rcclambda1, rcclambda2=rcclambda2, ncomp=ncomp, type.CCApredscore=type.CCApredscore) Qp <- result.ps$Qp if (type.CCApredscore=="various"){ Qp1 <- result.ps$Qp1; Qp2 <- result.ps$Qp2; Qp3 <- result.ps$Qp3; Qp4 <- result.ps$Qp4; Qp5 <- result.ps$Qp5; Qp6 <- result.ps$Qp6 } } else if (type.method == "svm"){ At <- A[tindex.r,] Ap <- A[pindex.r,] #library(kernlab) result.ps.svm <- pscore.svm(Xt=Xt, Xp=Xp, At=At, fold=fold, scaled=svmscaled, kernel=svmkernel, kpar=svmkpar, C=svmC) #result.ps.svm <- pscore.svm(Xt=Xt, Xp=Xp, At=At, fold=fold, scaled=F, kernel=svmkernel, kpar=svmkpar, C=svmC) Qp <- result.ps.svm$Qp } else if (type.method == "bayse"){ } else if (type.method == "nn"){ At <- A[tindex.r,] Ap <- A[pindex.r,] #library(kernlab) result.ps.nn <- pscore.nn(Xt=Xt, Xp=Xp, At=At, fold=fold, nneighbor=nneighbor) Qp <- result.ps.nn$Qp } else if (type.method == "reg"){ } else if (type.method == "kreg"){ Ktt <- K[tindex.r,tindex.r] Kpt <- K[pindex.r,tindex.r] result.ps.kreg <- pscore.kreg(Ktt=Ktt, Kpt=Kpt, Yt=Yt, lambda=lambda) Qp <- result.ps.kreg$Qp } else if (type.method == "mkreg"){ Ktt1 <- K1[tindex.r,tindex.r] Kpt1 <- K1[pindex.r,tindex.r] Ktt2 <- K2[tindex.r,tindex.r] Kpt2 <- K2[pindex.r,tindex.r] result.ps.mkreg <- pscore.mkreg(Ktt1=Ktt1, Kpt1=Kpt1, Ktt2=Ktt2, Kpt2=Kpt2, Yt=Yt, lambda1=lambdax1, lambda2=lambdax2) #result.ps.mkreg <- pscore.mkreg(Ktt1=Ktt1, Kpt1=Kpt1, Ktt2=Ktt2, Kpt2=Kpt2, Yt=Yt, lambda1=lambdax1, lambda2=lambdax2, zero.differentKmat=T) Qp <- result.ps.mkreg$Qp } else if (type.method == "control"){ Qp <- matrix(0, nrow(Yp), ncol(Yp)) } else if (type.method == "randsample"){ At <- A[tindex.r,] Ap <- A[pindex.r,] Qp <- pscore.randsample(Xt=Xt, Xp=Xp, At=At, fold=5)$Qp #} else if (type.method == "random"){ # Qp <- matrix(rnorm(nrow(Yp)*ncol(Yp)), nrow(Yp), ncol(Yp)) } else { break } if (scaley==T){ if (type.method != "svm"){ Qp <- Qp * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) } } if (centery==T){ if (type.method != "svm"){ Qp <- Qp + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } } Q[pindex.r,] <- Qp if (type.CCApredscore == "various"){ meanmat <- matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) sdmat <- matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) if (scaley==T){ Qp1 <- Qp1 * sdmat; Qp2 <- Qp2 * sdmat; Qp3 <- Qp3 * sdmat; Qp4 <- Qp4 * sdmat; Qp5 <- Qp5 * sdmat; Qp6 <- Qp6 * sdmat } if (centery==T){ Qp1 <- Qp1 + meanmat; Qp2 <- Qp2 + meanmat; Qp3 <- Qp3 + meanmat; Qp4 <- Qp4 + meanmat; Qp5 <- Qp5 + meanmat; Qp6 <- Qp6 + meanmat } Q1[pindex.r,] <- Qp1; Q2[pindex.r,] <- Qp2; Q3[pindex.r,] <- Qp3; Q4[pindex.r,] <- Qp4; Q5[pindex.r,] <- Qp5; Q6[pindex.r,] <- Qp6 } } } # prediction accuracy for top 100 predictions for each drug accuracymat <- matrix(0, nx, 10) for (i in 1:nx){ for (j in 1:10){ ntop <- 10*j accuracymat[i,j] <- sum(A[i,order(Q[i,], decreasing=T)[1:ntop]]) / ntop } } # AUC for ROC curve and Precision-Recall curve if (type.CCApredscore == "default"){ aucresult <- eval.auc.curves(Q, A, localauc=localauc) list(aucscores=aucresult$aucscores, Q=Q, auc.roc=aucresult$auc.roc, auc.prc=aucresult$auc.prc, localauc.roc=aucresult$localauc.roc, localauc.prc=aucresult$localauc.prc, accuracymat=accuracymat) } else if (type.CCApredscore == "various"){ aucresult <- eval.auc.curves.variousscore(Q1,Q2,Q3,Q4,Q5,Q6,A, localauc=localauc) #list(aucvec.roc=aucresult$aucvec.roc, aucvec.prc=aucresult$aucvec.prc, aucscores=aucresult$aucscores) list(aucscores=aucresult$aucscores) } } eval.auc.curves <- function(Q, A, localauc=F){ auc.roc <- eval.auc.roc(as.vector(Q), as.vector(A)) auc.prc <- eval.auc.prc(as.vector(Q), as.vector(A)) if (localauc==F){ aucscores <- c(auc.roc, auc.prc) aucscores <- round(aucscores, 5) list(auc.roc=auc.roc, auc.prc=auc.prc, aucscores=aucscores) } else if (localauc==T){ py <- ncol(A) # local AUC localauc.roc <- rep(0, py) localauc.prc <- rep(0, py) for (k in 1:py){ localauc.roc[k] <- eval.auc.roc(Q[,k], A[,k]) localauc.prc[k] <- eval.auc.prc(Q[,k], A[,k]) } aucscores <- c(auc.roc, auc.prc, mean(localauc.roc), mean(localauc.prc)) aucscores <- round(aucscores, 5) list(auc.roc=auc.roc, auc.prc=auc.prc, localauc.roc=localauc.roc, localauc.prc=localauc.prc, aucscores=aucscores) } } eval.auc.curves.variousscore <- function(Q1, Q2, Q3, Q4, Q5, Q6, A, localauc=F){ auc.roc1 <- eval.auc.roc(as.vector(Q1), as.vector(A)) auc.roc2 <- eval.auc.roc(as.vector(Q2), as.vector(A)) auc.roc3 <- eval.auc.roc(as.vector(Q3), as.vector(A)) auc.roc4 <- eval.auc.roc(as.vector(Q4), as.vector(A)) auc.roc5 <- eval.auc.roc(as.vector(Q5), as.vector(A)) auc.roc6 <- eval.auc.roc(as.vector(Q6), as.vector(A)) aucvec.roc <- c(auc.roc1, auc.roc2, auc.roc3, auc.roc4, auc.roc5, auc.roc6) auc.prc1 <- eval.auc.prc(as.vector(Q1), as.vector(A)) auc.prc2 <- eval.auc.prc(as.vector(Q2), as.vector(A)) auc.prc3 <- eval.auc.prc(as.vector(Q3), as.vector(A)) auc.prc4 <- eval.auc.prc(as.vector(Q4), as.vector(A)) auc.prc5 <- eval.auc.prc(as.vector(Q5), as.vector(A)) auc.prc6 <- eval.auc.prc(as.vector(Q6), as.vector(A)) aucvec.prc <- c(auc.prc1, auc.prc2, auc.prc3, auc.prc4, auc.prc5, auc.prc6) if (localauc==F){ aucscores <- c(aucvec.roc, aucvec.prc) aucscores <- round(aucscores, 5) #list(aucvec.roc=aucvec.roc, aucvec.prc=aucvec.prc, aucscores=aucscores) list(aucscores=aucscores) } else if (localauc==T){ py <- ncol(A) # local AUC localauc.roc1 <- rep(0, py); localauc.prc1 <- rep(0, py) localauc.roc2 <- rep(0, py); localauc.prc2 <- rep(0, py) localauc.roc3 <- rep(0, py); localauc.prc3 <- rep(0, py) localauc.roc4 <- rep(0, py); localauc.prc4 <- rep(0, py) localauc.roc5 <- rep(0, py); localauc.prc5 <- rep(0, py) localauc.roc6 <- rep(0, py); localauc.prc6 <- rep(0, py) for (k in 1:py){ localauc.roc1[k] <- eval.auc.roc(Q1[,k], A[,k]); localauc.prc1[k] <- eval.auc.prc(Q1[,k], A[,k]) localauc.roc2[k] <- eval.auc.roc(Q2[,k], A[,k]); localauc.prc2[k] <- eval.auc.prc(Q2[,k], A[,k]) localauc.roc3[k] <- eval.auc.roc(Q3[,k], A[,k]); localauc.prc3[k] <- eval.auc.prc(Q3[,k], A[,k]) localauc.roc4[k] <- eval.auc.roc(Q4[,k], A[,k]); localauc.prc4[k] <- eval.auc.prc(Q4[,k], A[,k]) localauc.roc5[k] <- eval.auc.roc(Q5[,k], A[,k]); localauc.prc5[k] <- eval.auc.prc(Q5[,k], A[,k]) localauc.roc6[k] <- eval.auc.roc(Q6[,k], A[,k]); localauc.prc6[k] <- eval.auc.prc(Q6[,k], A[,k]) } aucscores1 <- c(auc.roc1, auc.prc1, mean(localauc.roc1), mean(localauc.prc1)) aucscores2 <- c(auc.roc2, auc.prc2, mean(localauc.roc2), mean(localauc.prc2)) aucscores3 <- c(auc.roc3, auc.prc3, mean(localauc.roc3), mean(localauc.prc3)) aucscores4 <- c(auc.roc4, auc.prc4, mean(localauc.roc4), mean(localauc.prc4)) aucscores5 <- c(auc.roc5, auc.prc5, mean(localauc.roc5), mean(localauc.prc5)) aucscores6 <- c(auc.roc6, auc.prc6, mean(localauc.roc6), mean(localauc.prc6)) #aucscores <- matrix(0, 6, 4) allaucscores <- rbind(aucscores1, aucscores2, aucscores3, aucscores4, aucscores5, aucscores6) allaucscores <- round(allaucscores, 5) colnames(allaucscores) <- c("AUC","AUPR","meanAUC","meanAUPR") #list(auc.roc=auc.roc, auc.prc=auc.prc, localauc.roc=localauc.roc, localauc.prc=localauc.prc, aucscores=aucscores) list(aucscores=allaucscores) } } eval.auc.roc <- function(scorevec, answervec){ scorevec <- as.vector(scorevec) answervec <- as.vector(answervec) result.cvfold <- list(predictions=scorevec, labels=answervec) pred <- prediction(result.cvfold$predictions, result.cvfold$labels) perf <- performance(pred, "auc") auc <- perf@y.values[[1]] auc } eval.auc.prc <- function(scorevec, answervec, nthval=100){ scorevec <- as.vector(scorevec) answervec <- as.vector(answervec) maxval <- max(scorevec) minval <- min(scorevec) eps <- 0.0001 maxval <- max(scorevec) - eps minval <- min(scorevec) + eps thvec <- seq(minval, maxval, len=nthval) pvec <- rep(0, nthval) rvec <- rep(0, nthval) for (j in 1:nthval){ thval <- thvec[j] pvec[j] <- sum(answervec[scorevec>thval]) / length(scorevec[scorevec>thval]) rvec[j] <- sum(answervec[scorevec>thval]) / sum(answervec) } #pvec[nthval] <- 1 #rvec[nthval] <- 0 #cbind(rvec, pvec) aucval <- 0 for (j in 1:(nthval-1)){ aucval <- aucval + 0.5 * (pvec[j] + pvec[j+1]) * (rvec[j]- rvec[j+1]) } aucval } curve.pr <- function(scorevec, answervec, nthval=200){ scorevec <- as.vector(scorevec) answervec <- as.vector(answervec) eps <- 0.0001 maxval <- max(scorevec) - eps minval <- min(scorevec) + eps thvec <- seq(minval, maxval, len=nthval) pvec <- rep(0, nthval) rvec <- rep(0, nthval) for (j in 1:nthval){ thval <- thvec[j] pvec[j] <- sum(answervec[scorevec>thval]) / length(scorevec[scorevec>thval]) rvec[j] <- sum(answervec[scorevec>thval]) / sum(answervec) } #pvec[nthval] <- 1 #rvec[nthval] <- 0 #cbind(rvec, pvec) aucval <- 0 for (j in 1:(nthval-1)){ aucval <- aucval + 0.5 * (pvec[j] + pvec[j+1]) * (rvec[j]- rvec[j+1]) } #aucval list(pvec=pvec, rvec=rvec, prmat=cbind(rvec, pvec), aucval=aucval) } geninv <- function(M, eps=0.000001){ #eps <- 0.0001 M <- eigen(M,T) M$vectors <- M$vectors[,M$values[]>eps] M$values <- M$values[ M$values[]>eps ] inv <- diag(1/sqrt(M$values)) Minv <- M$vectors %*% inv %*% t(M$vectors) Minv } create.sideeffect.newpredpairmat <- function(Q, Xp, Yt) { n <- nrow(Q) p <- ncol(Q) pubchemid <- rownames(Q) effectterm <- colnames(Q) degree <- apply(Xp,1,sum) effectfreq <- apply(Yt,2,sum) pairmat <- data.frame(matrix(0, n*p, 4)) pairmat[,1] <- as.vector(matrix(pubchemid, n, p, byrow=F)) pairmat[,2] <- as.vector(matrix(degree, n, p, byrow=F)) pairmat[,3] <- as.vector(matrix(effectfreq, n, p, byrow=T)) pairmat[,4] <- as.vector(matrix(effectterm, n, p, byrow=T)) pairmat[,5] <- as.vector(round(Q,6)) colnames(pairmat) <- c("pubchemID","degree","effectfreq","sideeffect","score") pairmat } adjust.xcol.xnewcol <- function(X, Xnew){ nx <- nrow(X); px <- ncol(X) nxnew <- nrow(Xnew); pxnew <- ncol(Xnew) Xp <- matrix(0, nxnew, px) rownames(Xp) <- rownames(Xnew) colnames(Xp) <- colnames(X) Xp[,intersect(colnames(X),colnames(Xnew))] <- Xnew[,intersect(colnames(X),colnames(Xnew))] Xp } mergemat <- function(X, Y){ xrow <- rownames(X); xcol <- colnames(X) yrow <- rownames(Y); ycol <- colnames(Y) zrow <- unique(sort(union(xrow, yrow))) zcol <- unique(sort(union(xcol, ycol))) Z <- matrix(0, length(zrow), length(zcol)) Z1 <- matrix(0, length(zrow), length(zcol)) Z2 <- matrix(0, length(zrow), length(zcol)) rownames(Z) <- zrow; colnames(Z) <- zcol rownames(Z1) <- zrow; colnames(Z1) <- zcol rownames(Z2) <- zrow; colnames(Z2) <- zcol Z1[xrow,xcol] <- X Z2[yrow,ycol] <- Y Z3 <- Z1 + Z2 Z[Z3>0] <- 1 Z <- Z[,apply(Z,2,sum)>0] Z } ################## old cross-validation function (not used any more from 2011)############### pred.scca.old <- function(X, Y, Xnew, c1, c2, ncomp=4, norm.mean=T, norm.sd=T){ Xraw <- X; Yraw <- Y colindex.nonzero.x <- (1:ncol(X))[apply(X,2,sd)!=0] colindex.nonzero.y <- (1:ncol(Y))[apply(Y,2,sd)!=0] X <- X[,colindex.nonzero.x]; Y <- Y[,colindex.nonzero.y] #X <- scale(X); Y <- scale(Y) nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx # common substructures between Xt and Xp colname.tx <- colnames(Xt) colname.px <- colnames(Xp) commonindex <- (1:length(colname.px))[is.element(colname.px, colname.tx)] Xp <- Xp[,commonindex] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) if (norm.mean==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) #Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (norm.sd==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) #Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } result <- scca(x=X, z=Y, typex = "standard", typez = "standard", penaltyx = c1, penaltyz = c2, K = ncomp, niter = 15, v = NULL, trace = FALSE, standardize = FALSE, xnames = NULL, znames = NULL, chromx = NULL, chromz = NULL, upos = FALSE, uneg = FALSE, vpos = FALSE, vneg = FALSE, outcome = NULL, y = NULL, cens = NULL) # canonical correlation dvec <- rep(0, ncomp) # weight matrix features of non-zeros #umat <- matrix(0, px, ncomp) #vmat <- matrix(0, py, ncomp) # weight matrix for all input features umatall <- matrix(0, ncol(Xraw), ncomp) vmatall <- matrix(0, ncol(Yraw), ncomp) # training result <- CCA(x=X, z=Y, typex = "standard", typez = "standard", penaltyx = c1, penaltyz = c2, K = ncomp, niter = 15, v = NULL, trace = FALSE, standardize = FALSE, xnames = NULL, znames = NULL, chromx = NULL, chromz = NULL, upos = FALSE, uneg = FALSE, vpos = FALSE, vneg = FALSE, outcome = NULL, y = NULL, cens = NULL) dvec <- result$d # adjustment of positive and negatieve for (k in 1:ncomp){ wmaxorderx <- order(abs(result$u[,k]), decreasing=T)[1] if (result$u[wmaxorderx,k] < 0){ result$u[,k] <- - result$u[,k] result$v[,k] <- - result$v[,k] } } umatall[colindex.nonzero.x, ] <- result$u vmatall[colindex.nonzero.y, ] <- result$v umat <- result.sca$uall vmat <- result.sca$vall dvec <- result.sca$d ## prediction Q.pxty <- (Xp %*% umat) %*% diag(dvec) %*% t(Yt %*% vmat) Q.txty <- (Xt %*% umat) %*% diag(dvec) %*% t(Yt %*% vmat) ## result rownames(Q.pxty) <- rownames(Xp) colnames(Q.pxty) <- rownames(Yt) rownames(Q.txty) <- rownames(Xt) colnames(Q.txty) <- rownames(Yt) # output pairmat.pxty <- create.sortpairmat(round(Q.pxty,4)) pairmat.txty <- create.sortpairmat(round(Q.txty,4)) pairmat.newtxty <- create.sortpairmat.newtxty(round(Q.txty,4), A) list(Q.pxty=Q.pxty, Q.txty=Q.txty, pairmat.pxty=pairmat.pxty, pairmat.txty=pairmat.txty, pairmat.newtxty=pairmat.newtxty) } eval.localauc <- function(scoremat, answermat, type="roc"){ p <- ncol(answermat) aucvec <- rep(0,p) for (j in 1:p){ if (type=="pr"){ aucvec[j] <- eval.auc.pr(scoremat[,j], answermat[,j]) } else if (type=="roc"){ aucvec[j] <- eval.auc(scoremat[,j], answermat[,j]) } } aucvec } eval.auc <- function(scorevec, answervec){ scorevec <- as.vector(scorevec) answervec <- as.vector(answervec) result.cvfold <- list(predictions=scorevec, labels=answervec) pred <- prediction(result.cvfold$predictions, result.cvfold$labels) perf <- performance(pred, "auc") auc <- perf@y.values[[1]] auc } eval.auc.pr <- function(scorevec, answervec, nthval=100){ scorevec <- as.vector(scorevec) answervec <- as.vector(answervec) maxval <- max(scorevec) minval <- min(scorevec) eps <- 0.0001 maxval <- max(scorevec) - eps minval <- min(scorevec) + eps thvec <- seq(minval, maxval, len=nthval) pvec <- rep(0, nthval) rvec <- rep(0, nthval) for (j in 1:nthval){ thval <- thvec[j] pvec[j] <- sum(answervec[scorevec>thval]) / length(scorevec[scorevec>thval]) rvec[j] <- sum(answervec[scorevec>thval]) / sum(answervec) } #pvec[nthval] <- 1 #rvec[nthval] <- 0 #cbind(rvec, pvec) aucval <- 0 for (j in 1:(nthval-1)){ aucval <- aucval + 0.5 * (pvec[j] + pvec[j+1]) * (rvec[j]- rvec[j+1]) } aucval } cv.scca <- function(X, Y, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, localauc=F){ A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n # Q1 <- matrix(0, n, py) # Q2 <- matrix(0, n, py) Q3 <- matrix(0, n, py) # Q4 <- matrix(0, n, py) # Q5 <- matrix(0, n, py) # rownames(Q1) <- rownames(A) # colnames(Q1) <- colnames(A) # rownames(Q2) <- rownames(A) # colnames(Q2) <- colnames(A) rownames(Q3) <- rownames(A) colnames(Q3) <- colnames(A) dsum.cv <- rep(0,fold) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (centery==T){ Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } if (scaley==T){ Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } # training result.scca <- scca(X=Xt, Y=Yt, c1=c1, c2=c2, ncomp=ncomp) #ur <- result.sca$u #vr <- result.sca$v #ur <- result.sca$uall #vr <- result.sca$vall ur <- result.scca$u vr <- result.scca$v dr <- result.scca$d corvecr <- rep(0,ncomp) for (j in 1:ncomp){ corvecr[j] <- cor(result.scca$scorex[,j],result.scca$scorey[,j]) } # test ##### Edouard score ##### # Qp1 <- (Xp%*%ur) %*% diag(dr) %*% t(vr) Qp3 <- (Xp%*%ur) %*% diag(corvecr) %*% t(vr) ##### Recomb score ##### # vrtvr <- t(vr) %*% vr # eps <- 0.000001 # if (min(eigen(vrtvr,T)$values) < eps){ # vrtvrinv <- geninv(vrtvr, eps=eps) # } else { # vrtvrinv <- solve(vrtvr) # } # Qp2 <- Xp %*% ur %*% vrtvrinv %*% t(vr) # Qp4 <- Xp %*% ur %*% diag(dr) %*% vrtvrinv %*% t(vr) # Qp5 <- Xp %*% ur %*% diag(corvecr) %*% vrtvrinv %*% t(vr) # ###Qp <- Xp %*% ur %*% diag(dr) %*% vrtvrinv %*% t(vr) if (scaley==T){ # Qp1 <- Qp1 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) # Qp2 <- Qp2 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) Qp3 <- Qp3 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) # Qp4 <- Qp4 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) # Qp5 <- Qp5 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) } if (centery==T){ # Qp1 <- Qp1 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # Qp2 <- Qp2 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) Qp3 <- Qp3 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # Qp4 <- Qp4 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # Qp5 <- Qp5 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } # Q1[pindex.r,] <- Qp1 # Q2[pindex.r,] <- Qp2 Q3[pindex.r,] <- Qp3 # Q4[pindex.r,] <- Qp4 # Q5[pindex.r,] <- Qp5 } # AUC for Edouard score # aucscore1 <- eval.auc(as.vector(Q1), as.vector(A)) aucscore3 <- eval.auc(as.vector(Q3), as.vector(A)) # AUC for RECOMB score # aucscore2 <- eval.auc(as.vector(Q2), as.vector(A)) # aucscore4 <- eval.auc(as.vector(Q4), as.vector(A)) # aucscore5 <- eval.auc(as.vector(Q5), as.vector(A)) # #aucvec <- c(aucscore1, aucscore2) # #aucvec <- c(aucscore1, aucscore2, aucscore3) # aucvec <- c(aucscore1, aucscore2, aucscore3, aucscore4, aucscore5) #aucvec <- aucscore3 # AUC for ROC curve auc.roc <- aucscore3 # AUC for Precision-Recall curve auc.prc <- eval.auc.pr(as.vector(Q3), as.vector(A)) # # local ROC # aucvec.pery1 <- rep(0, py) # aucvec.pery2 <- rep(0, py) # for (k in 1:py){ # aucvec.pery1[k] <- eval.auc(Q1[,k], A[,k]) # aucvec.pery2[k] <- eval.auc(Q2[,k], A[,k]) # } # aucvec.perx1 <- rep(0, n) # aucvec.perx2 <- rep(0, n) # for (k in 1:n){ # aucvec.perx1[k] <- eval.auc(Q1[k,], A[k,]) # aucvec.perx2[k] <- eval.auc(Q2[k,], A[k,]) # } # list(auc=aucvec, auc.pery1=aucvec.pery1, auc.pery2=aucvec.pery2, auc.perx1=aucvec.perx1, auc.perx2=aucvec.perx2, Q1=Q1, Q2=Q2) # list(auc=aucvec, Q1=Q1, Q2=Q2, Q3=Q3, Q4=Q4, Q5=Q5) if (localauc==T){ # local AUC auc.roc.eachy <- rep(0, py) auc.prc.eachy <- rep(0, py) for (k in 1:py){ auc.roc.eachy[k] <- eval.auc(Q3[,k], A[,k]) auc.prc.eachy[k] <- eval.auc.pr(Q3[,k], A[,k]) } auc.roc.eachx <- rep(0, n) auc.prc.eachx <- rep(0, n) #for (k in 1:n){ # auc.roc.eachx[k] <- eval.auc(Q3[k,], A[k,]) # auc.prc.eachx[k] <- eval.auc.pr(Q3[k,], A[k,]) #} #list(auc=aucvec, auc.eachy=auc.eachy, auc.eachx=auc.eachx, Q1=Q1, Q2=Q2, Q3=Q3, Q4=Q4, Q5=Q5) # global AUC and averaged local AUC #aucvec <- c(aucvec, mean(auc.eachy)) # 1) global ROC-AUC, 2) averaged local ROC-AUC, 3) global PR-AUC and 4) averaged local PR-AUC aucvec <- c(auc.roc, mean(auc.roc.eachy), auc.prc, mean(auc.prc.eachy)) #aucvec <- c(auc.roc, mean(auc.roc.eachy), auc.prc, mean(auc.prc.eachy), median(auc.roc.eachy), median(auc.prc.eachy)) #list(auc=aucvec, auc.eachy=auc.eachy, Q1=Q1, Q2=Q2, Q3=Q3, Q4=Q4, Q5=Q5) #list(auc=aucvec, auc.eachy=auc.eachy, Q=Q3) #list(auc=aucvec, Q=Q3) list(auc=aucvec, Q=Q3, auc.roc.eachx=auc.roc.eachx, auc.roc.eachy=auc.roc.eachy, auc.prc.eachx=auc.prc.eachx, auc.prc.eachy=auc.prc.eachy) } else { #list(auc=aucvec, Q1=Q1, Q2=Q2, Q3=Q3, Q4=Q4, Q5=Q5) #list(auc=aucvec, Q=Q3) aucvec <- c(auc.roc, auc.prc) list(auc=aucvec, Q=Q3) } } cv.reg <- function(X, Y, lambda=0.1, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, localauc=F){ A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (centery==T){ Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } if (scaley==T){ Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } # training #result.scca <- scca(X=Xt, Y=Yt, c1=c1, c2=c2, ncomp=ncomp) I <- diag(rep(1,ncol(Xt))) betahat <- (t(Xt) %*% Xt + lambda * I) %*% (t(Xt) %*% Yt) # test Qp <- Xp %*% betahat if (scaley==T){ Qp <- Qp * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) } if (centery==T){ Qp <- Qp + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } Q[pindex.r,] <- Qp } # AUC for ROC curve auc.roc <- eval.auc(as.vector(Q), as.vector(A)) # AUC for Precision-Recall curve auc.prc <- eval.auc.pr(as.vector(Q), as.vector(A)) if (localauc==T){ # local AUC auc.roc.eachy <- rep(0, py) auc.prc.eachy <- rep(0, py) for (k in 1:py){ auc.roc.eachy[k] <- eval.auc(Q[,k], A[,k]) auc.prc.eachy[k] <- eval.auc.pr(Q[,k], A[,k]) } auc.roc.eachx <- rep(0, n) auc.prc.eachx <- rep(0, n) #for (k in 1:n){ # auc.roc.eachx[k] <- eval.auc(Q[k,], A[k,]) # auc.prc.eachx[k] <- eval.auc.pr(Q[k,], A[k,]) #} #list(auc=aucvec, auc.eachy=auc.eachy, auc.eachx=auc.eachx, Q1=Q1, Q2=Q2, Q3=Q3, Q4=Q4, Q5=Q5) # global AUC and averaged local AUC #aucvec <- c(aucvec, mean(auc.eachy)) # 1) global ROC-AUC, 2) averaged local ROC-AUC, 3) global PR-AUC and 4) averaged local PR-AUC aucvec <- c(auc.roc, mean(auc.roc.eachy), auc.prc, mean(auc.prc.eachy)) list(auc=aucvec, Q=Q, auc.roc.eachx=auc.roc.eachx, auc.roc.eachy=auc.roc.eachy, auc.prc.eachx=auc.prc.eachx, auc.prc.eachy=auc.prc.eachy) } else { aucvec <- c(auc.roc, auc.prc) list(auc=aucvec, Q=Q) } } cv.rand <- function(X, Y, localauc=F){ A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py #Q <- matrix(0, n, py) Q <- matrix(rnorm(ny*py), ny, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) # AUC for ROC curve auc.roc <- eval.auc(as.vector(Q), as.vector(A)) # AUC for Precision-Recall curve auc.prc <- eval.auc.pr(as.vector(Q), as.vector(A)) if (localauc==T){ # local AUC auc.roc.eachy <- rep(0, py) auc.prc.eachy <- rep(0, py) for (k in 1:py){ auc.roc.eachy[k] <- eval.auc(Q[,k], A[,k]) auc.prc.eachy[k] <- eval.auc.pr(Q[,k], A[,k]) } auc.roc.eachx <- rep(0, n) auc.prc.eachx <- rep(0, n) #for (k in 1:n){ # auc.roc.eachx[k] <- eval.auc(Q[k,], A[k,]) # auc.prc.eachx[k] <- eval.auc.pr(Q[k,], A[k,]) #} # global AUC and averaged local AUC # 1) global ROC-AUC, 2) averaged local ROC-AUC, 3) global PR-AUC and 4) averaged local PR-AUC aucvec <- c(auc.roc, mean(auc.roc.eachy), auc.prc, mean(auc.prc.eachy)) list(auc=aucvec, Q=Q, auc.roc.eachx=auc.roc.eachx, auc.roc.eachy=auc.roc.eachy, auc.prc.eachx=auc.prc.eachx, auc.prc.eachy=auc.prc.eachy) } else { aucvec <- c(auc.roc, auc.prc) list(auc=aucvec, Q=Q) } } cv.svm <- function(X, Y, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n Qveclist <- c() Aveclist <- c() Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) #sQ <- matrix(0, n, py) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] At <- A[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] Ap <- A[pindex.r, ] for (k in 1:py){ if (length(unique(At[,k]))==1){ # no training # test prediction score testpred <- rep(-1, length(pindex.r)) Q[pindex.r,k] <- testpred } else { # training model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=scaled, kernel=kernel, kpar=kpar, C=C) # test testpred <- predict(model.svm, Xp, type="decision") Q[pindex.r,k] <- testpred } } } # global AUC aucscore <- eval.auc(as.vector(Q), as.vector(A)) # local AUC auc.eachy <- rep(0, py) for (k in 1:py){ auc.eachy[k] <- eval.auc(Q[,k], A[,k]) } auc.eachx <- rep(0, n) for (k in 1:n){ auc.eachx[k] <- eval.auc(Q[k,], A[k,]) } list(auc=aucscore, auc.eachy=auc.eachy, auc.eachx=auc.eachx, Q=Q) #list(auc=aucscore, Q=Q) } cv.nn <- function(X, Y, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ A <- Y nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n xsimmat <- norm.kmat(X %*% t(X)) Qveclist <- c() Aveclist <- c() Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) #sQ <- matrix(0, n, py) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] At <- A[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] Ap <- A[pindex.r, ] for (i in pindex.r){ #wprofile <- rep(0, py) #maxval <- 0 #for (j in tindex.r){ # simval <- xsimmat[i,j] # if (xsimmat > maxval){ # wprofile <- A[j,] * xsimmat[i,j] # maxval <- xsimmat[i,j] # } #} #maxval <- max(xsimmat[i,tindex.r]) #maxidx <- tindex.r[xsimmat[i,tindex.r]==maxval][1] #wprofile <- Y[maxidx,] * xsimmat[i,maxidx] maxidx <- tindex.r[order(xsimmat[i,tindex.r],decreasing=T)[1]] wprofile <- xsimmat[i,maxidx] * Y[maxidx,] Q[i,] <- wprofile } } # global AUC aucscore <- eval.auc(as.vector(Q), as.vector(A)) # local AUC auc.eachy <- rep(0, py) for (k in 1:py){ auc.eachy[k] <- eval.auc(Q[,k], A[,k]) } auc.eachx <- rep(0, n) for (k in 1:n){ auc.eachx[k] <- eval.auc(Q[k,], A[k,]) } list(auc=aucscore, auc.eachy=auc.eachy, auc.eachx=auc.eachx, Q=Q) #list(auc=aucscore, Q=Q) } #### for paper picture #### cv.scca.fast <- function(X, Y, A, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T){ nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n #Qveclist1 <- c() #Qveclist2 <- c() #Qveclist3 <- c() #Qveclist4 <- c() #Qveclist5 <- c() #Qveclist6 <- c() #Qveclist7 <- c() #Qveclist8 <- c() #Aveclist <- c() Q1 <- matrix(0, n, py) # Q2 <- matrix(0, n, py) rownames(Q1) <- rownames(A) colnames(Q1) <- colnames(A) # rownames(Q2) <- rownames(A) # colnames(Q2) <- colnames(A) dsum.cv <- rep(0,fold) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) if (centerx==T){ Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) } if (centery==T){ Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } if (scalex==T){ Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) } if (scaley==T){ Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) } # training result.scca <- scca(X=Xt, Y=Yt, c1=c1, c2=c2, ncomp=ncomp) #ur <- result.sca$u #vr <- result.sca$v #ur <- result.sca$uall #vr <- result.sca$vall ur <- result.scca$u vr <- result.scca$v dr <- result.scca$d # test #dmat <- t(Xp%*%ur) %*% Yp %*% vr #dsum.cv[r] <- sum(diag(dmat)[1:ncomp]) ##### Edouard score ##### Qp1 <- (Xp%*%ur) %*% diag(dr) %*% t(vr) #Qp12 <- (Xp%*%ur) %*% diag(dr) %*% t(vr) %*% diag(rep(1,nkeyword)) # ##### Recomb score ##### # vrtvr <- t(vr) %*% vr # eps <- 0.000001 # if (min(eigen(vrtvr,T)$values) < eps){ # vrtvrinv <- geninv(vrtvr, eps=eps) # } else { # vrtvrinv <- solve(vrtvr) # } # Qp2 <- Xp %*% ur %*% vrtvrinv %*% t(vr) # #Qp22 <- Xp %*% ur %*% vrtvrinv %*% t(vr) %*% diag(rep(1,nkeyword)) sQp1 <- Qp1 #sQp12 <- Qp12 # sQp2 <- Qp2 # #sQp22 <- Qp22 ###Qp <- Xp %*% ur %*% diag(dr) %*% vrtvrinv %*% t(vr) if (scaley==T){ sQp1 <- sQp1 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) #sQp12 <- sQp12 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) # sQp2 <- sQp2 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) # #sQp22 <- sQp22 * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) } if (centery==T){ sQp1 <- sQp1 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) #sQp12 <- sQp12 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # sQp2 <- sQp2 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # #sQp22 <- sQp22 + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) } #Qveclist1 <- c(Qveclist1, as.vector(sQp1)) #Qveclist2 <- c(Qveclist2, as.vector(sQp2)) #Ap <- A[pindex.r,] #Aveclist <- c(Aveclist, as.vector(Ap)) Q1[pindex.r,] <- sQp1 # Q2[pindex.r,] <- sQp2 } # # AUC for Edouard score # #aucscore1 <- eval.auc(Qveclist1, Aveclist) # aucscore1 <- eval.auc(as.vector(Q1), as.vector(A)) # # AUC for RECOMB score # #aucscore2 <- eval.auc(Qveclist2, Aveclist) # aucscore2 <- eval.auc(as.vector(Q2), as.vector(A)) # aucvec <- c(aucscore1, aucscore2) # #aucvec ## # local ROC ## aucvec.pery1 <- rep(0, py) ## aucvec.pery2 <- rep(0, py) ## for (k in 1:py){ ## aucvec.pery1[k] <- eval.auc(Q1[,k], A[,k]) ## aucvec.pery2[k] <- eval.auc(Q2[,k], A[,k]) ## } ## aucvec.perx1 <- rep(0, n) ## aucvec.perx2 <- rep(0, n) ## for (k in 1:n){ ## aucvec.perx1[k] <- eval.auc(Q1[k,], A[k,]) ## aucvec.perx2[k] <- eval.auc(Q2[k,], A[k,]) ## } ## list(auc=aucvec, auc.pery1=aucvec.pery1, auc.pery2=aucvec.pery2, auc.perx1=aucvec.perx1, auc.perx2=aucvec.perx2, Q1=Q1, #Q2=Q2) # list(auc=aucvec, Q1=Q1, Q2=Q2) 1 } cv.svm.fast <- function(X, Y, A, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n Qveclist <- c() Aveclist <- c() Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) #sQ <- matrix(0, n, py) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] At <- A[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] Ap <- A[pindex.r, ] # # normatlization for X and Y based on training mean and sd # xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) # xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) # xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 # #Xt <- scale(Xt); Yt <- scale(Yt) # if (centerx==T){ # Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) # Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) # } # if (centery==T){ # Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) # Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # } # if (scalex==T){ # Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) # Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) # } # if (scaley==T){ # Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) # Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) # } for (k in 1:py){ if (length(unique(At[,k]))==1){ # no training # test testpred <- rep(-1, length(pindex.r)) #testpred <- rep(-10, length(pindex.r)) Q[pindex.r,k] <- testpred } else { # training #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", kernel="rbfdot",kpar=list(sigma=0.05),C=5) #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", kernel="rbfdot",kpar=list(sigma=0.05),C=5) #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", kernel="vanilladot",C=C) model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=scaled, kernel=kernel, kpar=kpar, C=C) # test #testpred <- predict(model.svm, Xp) testpred <- predict(model.svm, Xp, type="decision") Q[pindex.r,k] <- testpred } } #sQ[pindex.r,k] <- Q[pindex.r,k] #if (scaley==T){ # #sQ[pindex.r,k] <- sQ[pindex.r,k] * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) # sQ[pindex.r,k] <- sQ[pindex.r,k] * ysd[k] #} #if (centery==T){ # #sQ[pindex.r,k] <- sQ[pindex.r,k] + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # sQ[pindex.r,k] <- sQ[pindex.r,k] + ymean[k] #} } # #Qveclist <- c(Qveclist, as.vector(Q)) # #Aveclist <- c(Aveclist, as.vector(A)) # Qveclist <- as.vector(Q) # Aveclist <- as.vector(A) # # AUC for SVM score # aucscore <- eval.auc(Qveclist, Aveclist) ## aucscore ## # local ROC ## aucvec.pery <- rep(0, py) ## for (k in 1:py){ ## aucvec.pery[k] <- eval.auc(Q[,k], A[,k]) ## } ## aucvec.perx <- rep(0, px) ## for (k in 1:n){ ## aucvec.perx[k] <- eval.auc(Q[k,], A[k,]) ## } ## list(auc=aucscore, auc.pery=aucvec.pery, auc.perx=aucvec.perx, Q=Q) ### list(auc=aucscore, auc.pery=aucvec.pery, Q=Q) # list(auc=aucscore, Q=Q) 1 } cv.nn.fast <- function(X, Y, A, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n xsimmat <- norm.kmat(X %*% t(X)) Qveclist <- c() Aveclist <- c() Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) #sQ <- matrix(0, n, py) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] At <- A[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] Ap <- A[pindex.r, ] # # normatlization for X and Y based on training mean and sd # xmean <- apply(Xt,2,mean); ymean <- apply(Yt,2,mean) # xsd <- apply(Xt,2,sd); ysd <- apply(Yt,2,sd) # xsd[xsd==0] <- 1; ysd[ysd==0] <- 1 # #Xt <- scale(Xt); Yt <- scale(Yt) # if (centerx==T){ # Xt <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) # Xp <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) # } # if (centery==T){ # Yt <- Yt - matrix(ymean, nrow(Yt), ncol(Yt), byrow=T) # Yp <- Yp - matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # } # if (scalex==T){ # Xt <- Xt * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) # Xp <- Xp * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) # } # if (scaley==T){ # Yt <- Yt * matrix(1/ysd, nrow(Yt), ncol(Yt), byrow=T) # Yp <- Yp * matrix(1/ysd, nrow(Yp), ncol(Yp), byrow=T) # } for (i in pindex.r){ #wprofile <- rep(0, py) #maxval <- 0 #for (j in tindex.r){ # simval <- xsimmat[i,j] # if (xsimmat > maxval){ # wprofile <- A[j,] * xsimmat[i,j] # maxval <- xsimmat[i,j] # } #} #maxval <- max(xsimmat[i,tindex.r]) #maxidx <- tindex.r[xsimmat[i,tindex.r]==maxval][1] #wprofile <- Y[maxidx,] * xsimmat[i,maxidx] maxidx <- tindex.r[order(xsimmat[i,tindex.r],decreasing=T)[1]] wprofile <- xsimmat[i,maxidx] * Y[maxidx,] Q[i,] <- wprofile } #for (k in 1:py){ # if (length(unique(At[,k]))==1){ # # no training # # test # testpred <- rep(-1, length(pindex.r)) # #testpred <- rep(-10, length(pindex.r)) # Q[pindex.r,k] <- testpred # } else { # # training # #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", kernel="rbfdot",kpar=list(sigma=0.05),C=5) # #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", kernel="rbfdot",kpar=list(sigma=0.05),C=5) # #model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", kernel="vanilladot",C=C) # model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=scaled, kernel=kernel, kpar=kpar, C=C) # # test # testpred <- predict(model.svm, Xp) # Q[pindex.r,k] <- testpred # } #} #sQ[pindex.r,k] <- Q[pindex.r,k] #if (scaley==T){ # #sQ[pindex.r,k] <- sQ[pindex.r,k] * matrix(ysd, nrow(Yp), ncol(Yp), byrow=T) # sQ[pindex.r,k] <- sQ[pindex.r,k] * ysd[k] #} #if (centery==T){ # #sQ[pindex.r,k] <- sQ[pindex.r,k] + matrix(ymean, nrow(Yp), ncol(Yp), byrow=T) # sQ[pindex.r,k] <- sQ[pindex.r,k] + ymean[k] #} } ## Qveclist <- c(Qveclist, as.vector(Q)) ## Aveclist <- c(Aveclist, as.vector(A)) # Qveclist <- as.vector(Q) # Aveclist <- as.vector(A) # # AUC for SVM score # aucscore <- eval.auc(Qveclist, Aveclist) ## #aucscore ## # local ROC ## aucvec.pery <- rep(0, py) ## for (k in 1:py){ ## aucvec.pery[k] <- eval.auc(Q[,k], A[,k]) ## } ## aucvec.perx <- rep(0, px) ## for (k in 1:n){ ## aucvec.perx[k] <- eval.auc(Q[k,], A[k,]) ## } ## list(auc=aucscore, auc.pery=aucvec.pery, auc.perx=aucvec.perx, Q=Q) # list(auc=aucscore, Q=Q) 1 } ################ for comparison with Roded method ############## cv.svm.r <- function(X, Y, A, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n Qveclist <- c() Aveclist <- c() Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) #sQ <- matrix(0, n, py) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] At <- A[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] Ap <- A[pindex.r, ] for (k in 1:py){ if (length(unique(At[,k]))==1){ # no training # test prediction score testpred <- rep(-1, length(pindex.r)) Q[pindex.r,k] <- testpred } else { # training model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=scaled, kernel=kernel, kpar=kpar, C=C) # test #testpred <- predict(model.svm, Xp, type="decision") testpred <- predict(model.svm, Xp, type="response") Q[pindex.r,k] <- testpred } } } # global AUC aucscore <- eval.auc(as.vector(Q), as.vector(A)) # local AUC auc.eachy <- rep(0, py) for (k in 1:py){ auc.eachy[k] <- eval.auc(Q[,k], A[,k]) } auc.eachx <- rep(0, n) for (k in 1:n){ auc.eachx[k] <- eval.auc(Q[k,], A[k,]) } list(auc=aucscore, auc.eachy=auc.eachy, auc.eachx=auc.eachx, Q=Q) #list(auc=aucscore, Q=Q) } cv.svm.p <- function(X, Y, A, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n Qveclist <- c() Aveclist <- c() Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) #sQ <- matrix(0, n, py) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] At <- A[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] Ap <- A[pindex.r, ] for (k in 1:py){ if (length(unique(At[,k]))==1){ # no training # test prediction score testpred <- rep(-1, length(pindex.r)) Q[pindex.r,k] <- testpred } else { # training model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=scaled, kernel=kernel, kpar=kpar, C=C) # test #testpred <- predict(model.svm, Xp, type="decision") testpred <- predict(model.svm, Xp, type="probabilities") Q[pindex.r,k] <- testpred } } } # global AUC aucscore <- eval.auc(as.vector(Q), as.vector(A)) # local AUC auc.eachy <- rep(0, py) for (k in 1:py){ auc.eachy[k] <- eval.auc(Q[,k], A[,k]) } auc.eachx <- rep(0, n) for (k in 1:n){ auc.eachx[k] <- eval.auc(Q[k,], A[k,]) } list(auc=aucscore, auc.eachy=auc.eachy, auc.eachx=auc.eachx, Q=Q) #list(auc=aucscore, Q=Q) } cv.svm.v <- function(X, Y, A, c1=NULL, c2=NULL, ncomp=4, fold=5, randomseed=1, centerx=T, scalex=T, centery=T, scaley=T, scaled=TRUE, kernel ="rbfdot", kpar = "automatic", C = 1){ nx <- nrow(X); px <- ncol(X) ny <- nrow(Y); py <- ncol(Y) n <- nx nkeyword <- py rsize <- trunc(n / fold) index <- 1:n Qveclist <- c() Aveclist <- c() Q <- matrix(0, n, py) rownames(Q) <- rownames(A) colnames(Q) <- colnames(A) #sQ <- matrix(0, n, py) for (r in 1:fold){ # random seed set.seed(randomseed - 1 + r) # remove a gene if (length(index) >= rsize){ rindex <- sample(index, size=rsize) index <- setdiff(index, rindex) } else { rindex <- index index <- setdiff(index, rindex) } pindex.r <- rindex tindex.r <- (1:n)[-pindex.r] Xt <- X[tindex.r, ] Yt <- Y[tindex.r, ] At <- A[tindex.r, ] Xp <- X[pindex.r, ] Yp <- Y[pindex.r, ] Ap <- A[pindex.r, ] for (k in 1:py){ if (length(unique(At[,k]))==1){ # no training # test prediction score testpred <- rep(-1, length(pindex.r)) Q[pindex.r,k] <- testpred } else { # training model.svm <- ksvm(x=Xt, y=At[,k], type="C-svc", scaled=scaled, kernel=kernel, kpar=kpar, C=C) # test #testpred <- predict(model.svm, Xp, type="decision") testpred <- predict(model.svm, Xp, type="votes") Q[pindex.r,k] <- testpred } } } # global AUC aucscore <- eval.auc(as.vector(Q), as.vector(A)) # local AUC auc.eachy <- rep(0, py) for (k in 1:py){ auc.eachy[k] <- eval.auc(Q[,k], A[,k]) } auc.eachx <- rep(0, n) for (k in 1:n){ auc.eachx[k] <- eval.auc(Q[k,], A[k,]) } list(auc=aucscore, auc.eachy=auc.eachy, auc.eachx=auc.eachx, Q=Q) #list(auc=aucscore, Q=Q) }