# See CRAN package TTCA require(MASS) require(Matrix) require(RISmed) require(quantreg) require(VennDiagram) require(tcltk2) require(tcltk) ########################################## #### Gene-ANALYSE ########################################## See CRAN package TTCA data(EGF,Control,annot,annotation) S="gene" Control.time <- c(0,0,0.5,1,4,6,24,24,48,48,48) EGF.time <- c(0,0,0.5,0.5,1,2,4,6,8,12,18,24,24,48,48,48) file = paste0(getwd(),"/TTCA_Gene") dir.create(file) TTCAresult<-TTCA(grp1=EGF, grp1.time=EGF.time, grp2=Control, grp2.time=Control.time,S="gene", lambda=0.6, annot=annot, annotation=annotation,pVal=0.05,codetest=FALSE, file=file, Stimulus1="epidermal+growth+factor", timeInt=c(4,12), MaxPics =10000) ########################################## #### GO-ANALYSE ########################################## data(EGF,Control,annot,annotation) require(biomaRt) ensembl <- useMart("ENSEMBL_MART_ENSEMBL",dataset="hsapiens_gene_ensembl") mapGO <- getBM(attributes=c("go_id","name_1006",'affy_hugene_2_0_st_v1'), filters = 'affy_hugene_2_0_st_v1', values=rownames(annot), mart =ensembl) colnames(mapGO)<-c("go_id","GO_name","probeset_id") S="GO" Control.time <- c(0,0,0.5,1,4,6,24,24,48,48,48) file = paste0(getwd(),"/TTCA_GO") dir.create(file) TTCAresult<-TTCA(grp1=EGF, grp1.time=EGF.time, grp2=Control, grp2.time=Control.time, S="GO", pVal=0.05,lambda=0.6,codetest=FALSE, file=file, Stimulus1="epidermal+growth+factor", timeInt=c(4,12), MaxPics=10000, mapGO=mapGO) ### Included functions ## # TTCA (main function) # changeToGO # DistplotPval # FF.median # Integral # medianr # medianrS # my.write # optimalMethodeDistr # Peakscore # PeakscoreH0H1 # ProgTime # revOverlapScore # Vennfunc # weightvec ######################### TTCA = function(grp1, grp1.time, grp2, grp2.time,lambda=0.6,annot=NA,annotation="annotation", timeInt=NULL,pVal=0.05,codetest=FALSE,file=getwd(),MaxPics=10000,Stimulus1="",Stimulus2="", S="gene",mapGO="",PeakMode="norm"){ ####################################### ## description of function values: ######################################## #' @param grp1 : Dataset with longitudinal sampled data (data.frame) #' @param grp2 : Dataset with longitudinal sampled data (data.frame) for comparison #' @param grp1.time : Timepoints for Dataset 1 (vector like: c(0,0,0.5,1,2,4,6,8,12,12) #' @param grp2.time : Timepoints for Dataset 2 (vector like: c(0,0,0.5,3,2,4,6,8,12,12,24) #' @param lambda : Smoothingparameter in penalty term for fitting with quantil regression (Default: lambda=0.6 ). Find own parameter! #' @param annot : Annotation for pictures and result (Data.frame with 2 columns with ID and GeneName). (Default: annot=NA) #' @param pVal : P-value for the local hypothesis test (default: 0.05). #' @param codetest : Reduces the dataSet to 200 features for a quick pass of the program. (default: codetest=FALSE) #' @param PeakMode : Peakmode "norm" use variance between replicates, If you change to another character value, a normal hypothesis test will be conducted (default: PeakMode="norm") #' @param timeInt : Defines early, middle and late time period. Define the middle time period between 4 h and 12 h with timeInt<-c(4,12). (default: timeInt=NULL) #' @param file : Where the result folder should be created? (default: file=getwd() ) #' @param MaxPics : Limits the number of plots (default: MaxPics=10000) #' @param Stimulus1 : With which term the gene shall be searched in Pubmed? Stimulus1="Insulin+like+growth+factor" ( default: Stimulus1="") #' @param Stimulus2 : With which term the gene shall be searched in Pubmed? Stimulus2="epidermal+growth+factor" ( default: Stimulus2="") #' @param S : Defines mode. S =="GO" changes programm to gene ontology mode (default: S="gene") #' @param mapGO : Link genes to Gene Ontology terms (default: mapGO="") #' @param annotation : Merges the TTCA by rowname with a table of your wish. Example: annotation<-annotation[,c("probeset_id", "gene_name","transkript_id","GO_BP","GO_CC","GO_mf")] (default: annotation="annotation") #' #' @return A table containing different significance values, rankings, p-values ####################################### ## ERROR if missing or wrong input ######################################## if( is.data.frame(annot) & !identical(rownames(annot),rownames(grp1)) & !identical(rownames(grp1),rownames(grp2)) ){ stop('Annot must have the same rownames like grp1 and grp2! Not identical rowname vector.')} if( !is.data.frame(annot) & S!="GO" ){ stop('You forgot the annot file. You can neglect this in GO mode.')} if( is.null(colnames(mapGO)[1]) & S =="GO" ){ stop('You need a mapGO file. See example in Documentation (?TTCA).')} ######################################## print("--------------------------------------------------------------------------------------------------") print(paste0(" ")) print(paste0(" ")) print(paste0("-- --")) print(paste0("----- -----")) print(paste0("-------- WELCOME TO TTCA --------")) print(paste0("----- -----")) print(paste0("-- transcript time course analysis --")) print(paste0(" ")) print(paste0(" ")) print(paste0(" ")) print("--------------------------------------------------------------------------------------------------") print(paste0(" ")) # ---MatrixModels ,quantreg: Quantil Regression -----tcltk2: for progress bar ----NCBI2R and XML: for literature search ---MASS: for fit curve to distrubution ######################################## ## process Bar ######################################## grp1n<-deparse(substitute(grp1)) #for graphics at the end grp2n<-deparse(substitute(grp2)) #for graphics at the end startTimeFunction<-Sys.time() global <- tkProgressBar(title = paste0("global progress bar ",grp1n," vs. ",grp2n," (Start time: ",format(startTimeFunction, "%H:%M"),")"), min = 0, max = 100, width = 1000) # progress bar setTkProgressBar(global, 1, label=paste("Creates new folder")) ######################################## ## Foldergeneration and Save Input: ######################################## folder1<-paste0(file,"/contrast_",grp1n,"-vs-",grp2n) if (exists("folder")){rm(folder)} for (i in 1:100){ folder2<-paste0(folder1,"_version_",i) if(!file.exists(folder2) & !exists("folder")){ dir.create(folder2) folder<-folder2} } dir.create(paste0(folder,"/Information")) dir.create(paste0(folder,"/Quality_Control")) dir.create(paste0(folder,"/Significance_And_Pval")) dir.create(paste0(folder,"/Saved_Intermediate_Results")) dir.create(paste0(folder,"/Significance_And_Pval/PeakScore")) dir.create(paste0(folder,"/Venn_and_other_characteristics")) dir.create(paste0(folder,"/Resultfigure")) rm(folder1,folder2,file) #Save Function Input Infotext<-list(); Infotext$header="Thanks for using method TTCA to extract genes with strongest activity over time.";Infotext$starttime<-startTimeFunction ;Infotext$grp1.time<-grp1.time; Infotext$grp2.time<-grp2.time; Infotext$grp1n<-grp1n; Infotext$grp2n<-grp2n; Infotext$lambda <-lambda; Infotext$pVal<-pVal ; Infotext$codetest = codetest; Infotext$Stimulus1=Stimulus1; Infotext$Stimulus2=Stimulus2; Infotext$MaxPics=MaxPics; Infotext$timeInt=timeInt; Infotext$annotsize=dim(annot); Infotext$annot=head(annot,10); Infotext$grp1size<-dim(grp1); Infotext$grp1<-head(grp1,10); Infotext$grp2size<-dim(grp2); Infotext$grp2<-head(grp2,10); Infotext$sessionInfo<-sessionInfo() sink(file = paste0(folder, "/Information/Function_Input.txt"),append = T) print(Infotext) sink(file =NULL) rm(Infotext) # setTkProgressBar(global, 2, label=paste("New folders have been created. Removes features that are not annotated or do not have positive values.")) ######################################## ## get sure that original data and related timepoints are ordered grp1 <-grp1[ ,order(grp1.time)] grp1.time <-grp1.time[order(grp1.time)] grp2 <-grp2[ ,order(grp2.time)] grp2.time <-grp2.time[order(grp2.time)] ######################################## ## Gene Ontology mode. Change gene to Gene Ontology and calculate reverse overlap Score if(S=="GO"){ print(paste0(format(Sys.time(), "%X"),": GO-Modus. ")) GO<-ChangeToGO(grp1=grp1,grp2=grp2,grp1.time=grp1.time,mapGO=mapGO) save(GO, file=paste0(folder,"/Saved_Intermediate_Results/RawGene_to_GO.RData")) grp1<-GO$grp1 grp2<-GO$grp2 annot=GO$annot } beginSize=nrow(grp1) # startnumber of features ########################################## ## merge dataSet grp1.grp2 <- merge(grp1,grp2,by="row.names") rownames(grp1.grp2) <- grp1.grp2[,"Row.names"] grp1.grp2 <- grp1.grp2[,-1] grp1.grp2.time <- c(grp1.time,grp2.time)#data timepoints grp <- c(rep(1, length(grp1.time)),rep(2, length(grp2.time)))# data groups ###################################### ## use only annotated probes if(S!="GO"){ rownames(annot)=annot[,"probeset_id" ] NotAnnot <- rownames((annot[is.na(annot[,"gene_name"]),])) grp1 <- grp1[!(rownames(grp1) %in% NotAnnot), ] grp2 <- grp2[!(rownames(grp2) %in% NotAnnot), ] grp1.grp2<-grp1.grp2[!(rownames(grp1.grp2) %in% NotAnnot), ] annot <- annot[!(rownames(annot) %in% NotAnnot), ] print(paste0(format(Sys.time(), "%X"),": ",beginSize-dim(grp1)[1] ," not annotated features have been removed (absolute ",round((1-dim(grp1)[1]/beginSize)*100,1)," % reduction)" )) } ######################################## ## use only features with more than one positve value for intensity across stimuli info<-nrow(grp1) Z1<-apply(grp1.grp2, 1,max) if(min(Z1,na.rm = TRUE)<0){ Z2<-names(Z1[Z1<0]) grp1 <- grp1[ !(rownames(grp1) %in% Z2), ] grp2 <- grp2[ !(rownames(grp2) %in% Z2), ] grp1.grp2<-grp1.grp2[ !(rownames(grp1.grp2) %in% Z2), ] print(paste0(format(Sys.time(), "%X"),": ",info-dim(grp1)[1] ," features have been removed because they have no positive intensity values (absolute ",round((1-dim(grp1)[1]/beginSize)*100,1)," % reduction)" )) rm(Z2) } rm(Z1,info) ######################################## ## code-test. Reduce featurenumber for faster test ######################################## if(codetest==TRUE){grp1.grp2<-grp1.grp2[sort(sample(1:nrow(grp1.grp2), 1100)), ] MaxPics=10 setTkProgressBar(global, 2.5, label=paste("Code testing modus. Works with 1100 features only")) print(paste0(format(Sys.time(), "%X"),": Code testing modus. Works with 1100 features only ")) } ################################################################################ ######################################## ## Start Mainpart ######################################## setTkProgressBar(global, 3, label=paste("Non-annotated features were removed.Tests for significant distances between the dynamics")) ######################################## ## Peak Score ######################################## Sumreplicates<-sum(duplicated(grp1.time))+sum(duplicated(grp2.time)) #count the number of replicates if (Sumreplicates > 4 & PeakMode=="norm"){ RESULT<- PeakScore(dat=grp1.grp2,grp=grp, tme=grp1.grp2.time,folder=folder,global=global) }else{ RESULT<-PeakScoreHOH1(dat=grp1.grp2,grp=grp, tme=grp1.grp2.time,folder=folder,global=global) } # identical dynamics disturbing calculations, Remove them. if (nrow(RESULT[RESULT[,"MaxDist"]==0,])>0){ identDynamics<-rownames(RESULT[RESULT[,"MaxDist"]==0,]) RESULT = RESULT[rownames(RESULT )!=identDynamics,] grp1.grp2=grp1.grp2[rownames(grp1.grp2)!=identDynamics,] grp1= grp1[rownames(grp1 )!=identDynamics,] grp2= grp2[rownames(grp2 )!=identDynamics,] } # tresP<-DistplotPval(x1<-RESULT[,"MaxDist",drop=FALSE],main="Hypothesistest for peak score",xlab="Maximum Distance", pVal=pVal,folder=paste0(folder,"/Significance_And_Pval/"),ForTest<-"Significant_Peak_Score_alternativ") RESULT[ ,"SignPeakH0"]<-RESULT[,"MaxDist"] > tresP$tres RESULT<-merge(RESULT,tresP$Pval ,by="row.names") rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] ## ProgressBar setTkProgressBar(global, 9.25, label=paste("Found significant distances between the dynamics.Program checks whether the dynamics are significant")) save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT_afterPeakScore.RData")) ################################################################################ ## Calculate Reverse overlap Score if(S=="GO"){ setTkProgressBar(global, 9.25, label=paste("Found significant distances between the dynamics.Program calculates reverse overlap score now")) ROS<-revOverlapScore(go=GO,tm1=grp1.time,tm2=grp2.time,folder=folder) RESULT<-merge(RESULT,ROS ,by="row.names") rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] tresRos<-DistplotPval(x1<-RESULT[,"RevOverlapScore",drop=FALSE],main="Reverse Overlap Score",xlab="overlap", pVal=pVal,folder=paste0(folder,"/Significance_And_Pval/"),ForTest<-"Reverse_Overlap_Score") RESULT[ ,"SignROS"]<-RESULT[,"RevOverlapScore"] > tresRos$tres RESULT<-merge(RESULT,tresRos$Pval ,by="row.names") rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] RESULT<-merge(RESULT,GO$groupsize,by="row.names") #if relation is working vor annot="" as well as for annot=data.frame(). avoid warning rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] setTkProgressBar(global, 9.75, label=paste("Calculation of reverse overlap score is done. Program checks whether the dynamics are significant")) save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT_after_ROS_and_groupsize.RData")) } ######################################## ## Test for significant dynamic with hypothesis test ######################################## ## estimate time consumption hypothesis test dynScor <- tkProgressBar(title = paste0("progress bar (Start time: ",format(Sys.time(), "%H:%M"),")"), min = 0, max = 100, width = 1000) # progress bar setTkProgressBar(dynScor,0, label=paste("Calculating time for dynamic test.")) timeusage<-proc.time() FF1time <- FF.median(dat=grp1.grp2[1:50,], grp=grp, tme=grp1.grp2.time) #for timemeasurement for 50 steps tt<-proc.time()-timeusage# time for 50 steps timePerFeature<-sum(as.numeric(tt)[1:2])/50 timeforSample<-timePerFeature*dim(grp1.grp2)[1] setTkProgressBar(global, 10, label=paste0( "Program checks whether the dynamics are significant. This step is expected to be completed by ",ProgTime(timeforSample),".")) setTkProgressBar(dynScor, 1, label=paste0( "Program checks whether the dynamics are significant.")) rm(FF1time,timeusage,timeforSample,timePerFeature,tt) ## start calculation local hypothesis test Dynamic<-FF.median(dat=grp1.grp2, grp=grp, tme=grp1.grp2.time,dynScor=dynScor,global=global) #Dynamic$FF hypothesis test/ error #Dynamic$qr1 quantil regression fit group1 #Dynamic$qr2 quantil regression fit group2 SaveDynamic<-Dynamic$FF save(SaveDynamic, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-before-DynamicScore_Dynamic.RData")) #load("C:/Users/Marco/Documents/CompBioSys/Affymetrix/AlbrechtMethod/contrast_IGF-vs-BControl_version_1/PeakScore/afterPeakScore.RData") #load("C:/Users/Marco/Documents/CompBioSys/Affymetrix/AlbrechtMethod/contrast_IGF-vs-BControl_version_1/DynamicScore/Dynamic.RData") RESULT[,"Dynamic"] <- as.data.frame(Dynamic$FF) #normalized value (0=no dynamik; 1=highest dynamik) ## reduce data set to the local significant Features after two-sided-t.test tresDyn<-DistplotPval(x1<-RESULT[,"Dynamic",drop=FALSE],main="Null hypothesis versus alternative hypothesis",xlab="(h0:residuals)/(h1:residuals)", pVal=pVal,folder=paste0(folder,"/Significance_And_Pval/"),ForTest<-"Significant_Dynamic_Score") RESULT<-merge(RESULT,tresDyn$Pval ,by="row.names") rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] RESULT[ ,"SignDyn"]<-RESULT[,"Dynamic"] > tresDyn$tres save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-with-DynamicScore.RData")) #load("C:/Users/Marco/Documents/CompBioSys/Affymetrix/AlbrechtMethod/contrast_EGF-vs-HGF_version_1/Saved_Intermediate_Results/RESULT-with-DynamicScore.RData") # progressbar close(dynScor) rm(tresDyn,dynScor, SaveDynamic) setTkProgressBar(global, 40, label=paste("Found significant dynamics. Program checks whether the integrals between dynamics are significant.")) ################################################################################ ######################################## ## Calcuate integral ######################################## # if early and late is not determinated program will set abritary intermediate values. if(length(timeInt)!=2){ T1<-(max(grp1.grp2.time)-min(grp1.grp2.time))/3 timeInt<-c(min(grp1.grp2.time)+T1,min(grp1.grp2.time)+2*T1) print(paste0(format(Sys.time(), "%X"),": Set early response period before ",round(timeInt[1],1),"h and late response period after ",round(timeInt[2],1),"h." ))} ################### ## classic #globMaxDist=max(grp1.grp2)-min(grp1.grp2) #int <- apply(X=grp1.grp2,MARGIN=1,FUN=Integral, grp=grp, grp1.time=grp1.time, grp2.time=grp2.time , timeInt= timeInt ,globMaxDist=globMaxDist) ## quantil regression qr1.qr2 <- cbind(Dynamic$qr1,Dynamic$qr2) rownames(qr1.qr2)<-rownames(grp1.grp2) colnames(qr1.qr2)<-c(unique(grp1.time),unique(grp2.time)) grpqr1.qr2 <- c(rep(1, ncol(Dynamic$qr1)),rep(2, ncol(Dynamic$qr2)))# data groups globMaxDist=max(qr1.qr2)-min(qr1.qr2) int <- apply(X=qr1.qr2,MARGIN=1,FUN=Integral, grp=grpqr1.qr2, grp1.time=unique(grp1.time), grp2.time=unique(grp2.time) , timeInt= timeInt ,globMaxDist=globMaxDist) ################### int=t(int) #int <- apply(int,MARGIN=1, function(x) x/max(x,na.rm = TRUE)) #normalization colnames(int)<-c("earlyResponse","middleResponse","lateResponse","completeResponse") for(i in 1:4){ tresI<-DistplotPval(x1<-as.data.frame(int[,i,drop=FALSE]),xlab="Integral beetween dynamics",main=paste0("Integral for ",colnames(int)[i]," significant in dynamic"), pVal=pVal,folder=paste0(folder,"/Significance_And_Pval/"),ForTest=colnames(int)[i]) int<-as.data.frame(int) if (i==1){ int[,"signifearlyRes"]=as.numeric(int[,"earlyResponse"]) >tresI$tres int<-merge(int,tresI$Pval ,by="row.names")} if (i==2){ int[,"signifmiddleRes"]=as.numeric(int[,"middleResponse"]) >tresI$tres int<-merge(int,tresI$Pval ,by="row.names")} if (i==3){ int[,"signiflateRes"]=as.numeric(int[,"lateResponse"]) >tresI$tres int<-merge(int,tresI$Pval ,by="row.names")} if (i==4){ int[,"signifResponse"]=as.numeric(int[,"completeResponse"])>tresI$tres int<-merge(int,tresI$Pval ,by="row.names")} rownames(int)<-int[,1] int<-int[,-1] } RESULT<-merge(RESULT, int ,by="row.names") rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-after-IntegralScore.RData")) # ################################################################################ ######################################## ## use only genes which are significant in less one cathegory ######################################## if(S=="GO"){ subRES<-RESULT[,c("SignPeak","SignDyn","signifearlyRes", "signifmiddleRes", "signiflateRes","SignROS")] }else{ subRES<-RESULT[,c("SignPeak","SignDyn","signifearlyRes", "signifmiddleRes", "signiflateRes")] } sig<-NULL for( i in 1:nrow(RESULT)){sig[i]=sum(as.logical(subRES[i,]))>=2} #for( i in 1:nrow(RESULT)){sig[i]=!all(!as.logical( subRES[i,]))} # test whether one significant threshold is exceeded RESULT<-RESULT[sig ,] rm(sig,tresI,subRES) setTkProgressBar(global, 45, label=paste("Found integrals between dynamics.",nrow(RESULT)," features are in at least one criterion significant. Programm is looking for features in Pubmed " )) save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-before-PubMedScore.RData")) ################################################################################ ######################################## ## Relevance Score: use only genes which are known in literature. ######################################## #load("C:/Users/Marco/Documents/CompBioSys/Affymetrix/AlbrechtMethod/contrast_EGF-vs-BControl_version_1/Saved_Intermediate_Results/RESULT-before-PubMedScore.RData") RESULT[,"PubMed"]=0 #test internet connectivity if (suppressWarnings(tryCatch( QueryCount(EUtilsSummary("TTCA")), error=function(e){-1}))==-1){print(paste0(format(Sys.time(), "%X"),": No access to webpage. Searching Pubmed for publication count was unsuccessful. ")) }else{ process<-round(1:10*(nrow(RESULT))/10) #calculating process information if (Stimulus1!=""){if (Stimulus2==""){Stimulus<-Stimulus1}else{Stimulus<-paste0(Stimulus1," AND ",Stimulus2)}} print("--------------------------------------------------------------------------------------------------") print(" Hint: ") print(" ") print(" In order not to overload the E-utility servers, NCBI recommends that users limit large jobs to ") print(" either weekends or between 9:00 PM and 5:00 AM Eastern time during weekdays. Failure to comply ") print(" with this policy may result in an IP address being blocked from accessing NCBI. ") print(" ") original<-Sys.timezone() Sys.setenv(TZ='EST') InTime<-any( c((as.numeric(format(Sys.time(), "%H")) < 5),(as.numeric(format(Sys.time(), "%H")) > 20), (weekdays(Sys.time()) %in% c("Saturday","Sunday")) ) ) if(InTime){InTimeText="(GOOD) "}else{InTimeText="(IP adress could be blocked) "} print(paste0(format(Sys.time()," At the moment it is %A at %I:%M %p Eastern time "),InTimeText," ")) Sys.setenv(TZ=original) print(format(Sys.time()," and %A at %I:%M %p your time ")) format(Sys.time(),"%I:%M %p") print(" Source: R-package RISmed ") print("--------------------------------------------------------------------------------------------------") print(" ") SlowDownPubmedCall<-seq(1,nrow(RESULT),3) ti<-proc.time() for (i in 1:nrow(RESULT)){ RESULT[i,"PubMed"]<-suppressWarnings(tryCatch( QueryCount(EUtilsSummary(paste0("(",annot[annot[,"probeset_id"]==rownames(RESULT[i,]),"gene_name"],") AND ",Stimulus))), error=function(e){NA})) if(i %in% process){ setTkProgressBar(global, (45+2*which(process==i)), label=paste("Found integrals between dynamics.",nrow(RESULT)," features are in at least one criterion significant. Programm is looking for features in Pubmed (",which(process==i)*10," % ready)") ) } # artificially slow down Pubmed call if (i %in% SlowDownPubmedCall){ ti2<- proc.time()-ti #print(max(0,1.001-as.numeric(ti2[3]))) show artificial waiting time Sys.sleep(max(0,1.001-as.numeric(ti2[3]))) #put program to sleep for some miliseconds ti<-proc.time() } } rm(process,ti,ti2,SlowDownPubmedCall,original,InTime,InTimeText) ## Create Pubmedscore. } RESULT[,c("PubMedscore")]<-logb(RESULT[,c("PubMed")]+1,base=(max(c(RESULT[,c("PubMed")],1),na.rm = TRUE)+1)) RESULT[is.na(RESULT[,"PubMedscore"]),"PubMedscore"]=0 # for Pubmedscore is NA not useful save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-after-PubMedScore.RData")) setTkProgressBar(global, 65, label=paste("Literature search in PubMed is complete. Calculates the Consensus Score." )) ################################################################################ ######################################## ## consensusScore ######################################## #standarizise Scores if(S=="GO"){ RESULTCS<-RESULT[,c("earlyResponse","middleResponse", "lateResponse","MaxDist","Dynamic","RevOverlapScore")] }else{ if ( all(RESULT[,"PubMedscore"] == RESULT[1,"PubMedscore"]) ){ RESULTCS<-RESULT[,c("earlyResponse","middleResponse", "lateResponse","MaxDist","Dynamic")] }else{ RESULTCS<-RESULT[,c("earlyResponse","middleResponse", "lateResponse","MaxDist","Dynamic","PubMedscore")] } } #RESULTCS<-apply(RESULTCS,MARGIN=2, function(x) x/max(x,na.rm = TRUE)) #normalization # for (i in 1:ncol(RESULTCS)){ # each NaN is set to 0 # RESULTCS[is.na(RESULTCS[,i]),]=0 # } RESULTCS<-apply(RESULTCS,2,function(x) (x-mean(x))/sd(x[x>0]) ) #x[x>0] important for pubmedscore with many 0 values # combine Integralscore CS1<-as.data.frame(rowMeans(RESULTCS[,c("earlyResponse","middleResponse", "lateResponse")])) #bulid consensusscore if(S=="GO"){ CS2<-rowMeans(cbind(RESULTCS[,c("MaxDist","Dynamic","RevOverlapScore")], CS1)) # test: identical(rownames(RESULT[,c("MaxDist","Dynamic","PubMedscore")]),rownames(CS1)) }else{ if ( all(RESULT[,"PubMedscore"] == RESULT[1,"PubMedscore"] ) ){ CS2<-rowMeans(cbind(RESULTCS[,c("MaxDist","Dynamic")], CS1)) }else{ CS2<-rowMeans(cbind(RESULTCS[,c("MaxDist","Dynamic","PubMedscore")], CS1)) } } setTkProgressBar(global, 67.5, label=paste("The consensus score has been calculated. Testing significance and producing pics." )) png(filename = paste0(folder,"/Venn_and_other_characteristics/Source_for_Consensus_Score.png"),width = 3.25, height = 3.25,units = "in",res = 1200, pointsize = 6) # plot sources for Consensus Score suppressWarnings(tryCatch(( plot(density(CS2, na.rm=TRUE),xlim=c(min(RESULTCS),-min(RESULTCS)),xlab=paste0("standardized distributions with ",nrow(RESULTCS)," elements."),col="black",main="Source for Consensus Score")#CS2 for ylim determination ), error=function(e){NA})) suppressWarnings(tryCatch(( lines(density( RESULTCS[,c("Dynamic")], na.rm=TRUE),col="blue",lwd=2) ), error=function(e){NA})) suppressWarnings(tryCatch(( lines(density( RESULTCS[,c("MaxDist")], na.rm=TRUE),col="red" ,lwd=2) ), error=function(e){NA})) suppressWarnings(tryCatch(( lines(density(rowMeans(RESULTCS[,c("earlyResponse","middleResponse", "lateResponse")]), na.rm=TRUE),col="green",lwd=2)), error=function(e){NA})) suppressWarnings(tryCatch(( if(S=="GO"){lines(density( RESULTCS[,c("RevOverlapScore")], na.rm=TRUE),col="pink",lwd=2) }else{ lines(density( RESULTCS[,c("PubMedscore")], na.rm=TRUE),col="pink",lwd=2) }), error=function(e){NA})) suppressWarnings(tryCatch(( lines(density(CS2, na.rm=TRUE),col="black",lwd=2)), error=function(e){NA})) #to foreground suppressWarnings(tryCatch(( if(S=="GO"){legend("topright", inset=.05,c("Consensus Score","Rev Overlap Score"), fill=c("black","pink"), horiz=FALSE,bty = "n") }else{ legend("topright", inset=.05,c("Consensus Score","Pubmed Score"), fill=c("black","pink"), horiz=FALSE,bty = "n") }), error=function(e){NA})) suppressWarnings(tryCatch(( legend("topleft", inset=.05,c("Distance Score","Dynamic Score","Integral Score"), fill=c("red","blue","green"), horiz=FALSE,bty = "n")), error=function(e){NA})) dev.off() # transform values beetween 0 and 1 CS3<-CS2-min(CS2) if (sum(CS3)==0){CS3<-CS3+0.01} RESULT[ ,"ConsensusScore"]<-CS3/max(CS3,na.rm = TRUE) save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-after-ConsensusScore1.RData")) # tresC<-DistplotPval(x1<-RESULT[,"ConsensusScore",drop=FALSE],xlab="Averarage Score by standarizied Scores",main=paste0("Significance in Consensus Score"), pVal=pVal,folder=paste0(folder,"/Significance_And_Pval/"),ForTest="Significant_Consensus_Score") RESULT<-merge(RESULT,tresC$Pval ,by="row.names") rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] RESULT[,"signifConsensus"]=as.numeric(RESULT[,"ConsensusScore"])>tresC$tres rm(CS2,CS3,RESULTCS,tresC) RESULT<-merge(RESULT,annot ,by="row.names") rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-after-ConsensusScore2.RData")) setTkProgressBar(global, 70, label=paste("The Consensus Score is finished. Produce ",min(nrow(RESULT),MaxPics)," pics to show the dynamic." )) #load("C:/Users/Marco/Documents/CompBioSys/Affymetrix/AlbrechtMethod//contrast_GO.EGF-vs-GO.BControl_version_2/Saved_Intermediate_Results/RESULT-after-ConsensusScore2.RData") #load("C:/Users/Marco/Documents/CompBioSys/Affymetrix/AlbrechtMethod/contrast_GO.EGF-vs-GO.BControl_version_2/Saved_Intermediate_Results/RawGene_to_GO.RData") ################################################################################ ######################################## ## PLOT dynamic figures ######################################## prg<-unique(round(seq(min(nrow(RESULT),MaxPics)/20,min(nrow(RESULT),MaxPics), min(nrow(RESULT),MaxPics)/20))) #for progress RESULT[,"Link"]=NA RESULT<-RESULT[rev(order(RESULT[,"ConsensusScore"])),] #order by Consensus Score timeusage<-proc.time() if(S=="GO"){ grp1SDu <-GO$grp1SDu grp2SDu <-GO$grp2SDu grp1SDd <-GO$grp1SDd grp2SDd <-GO$grp2SDd GOsz <-GO$groupsize } for (j in 1:min(nrow(RESULT),MaxPics)){ if(S=="GO"){ GOsd1u=grp1SDu[rownames(grp1SDu)==RESULT[j ,"probeset_id"],] GOsd2u=grp2SDu[rownames(grp2SDu)==RESULT[j ,"probeset_id"],] GOsd1d=grp1SDd[rownames(grp1SDd)==RESULT[j ,"probeset_id"],] GOsd2d=grp2SDd[rownames(grp2SDd)==RESULT[j ,"probeset_id"],] GOszj = GOsz[rownames(GOsz) ==RESULT[j ,"probeset_id"],1] } name=RESULT[j ,"gene_name"] name1=gsub("[[:punct:]]", " ", name) #remove all special symbols if(nchar(name)>40){cex1=40/nchar(name)*1.2}else{cex1=1.2} #quantil regression on expression data pos=which(rownames(grp1)==rownames(RESULT[j,])) #test identical(rownames(grp1),rownames(grp2)) Poi1=as.numeric(grp1[pos,]) Poi2=as.numeric(grp2[pos,]) if(!is.na(sum(Poi1,Poi2))){ p1 <- rqss(as.numeric(Poi1) ~ qss(grp1.time,lambda=0.6), tau = 0.5) p1$coef[2:length(p1$coef)]=p1$coef[2:length(p1$coef)]+ p1$coef[1] p1<-p1$coef p2 <- rqss(as.numeric(Poi2) ~ qss(grp2.time,lambda=0.6), tau = 0.5) p2$coef[2:length(p2$coef)]=p2$coef[2:length(p2$coef)]+ p2$coef[1] p2<-p2$coef gl <-c(Poi1,Poi2,p1,p2) if(S=="GO"){ p1u <- rqss(as.numeric(Poi1+GOsd1u) ~ qss(grp1.time,lambda=0.6), tau = 0.5) p1u$coef[2:length(p1u$coef)]=p1u$coef[2:length(p1u$coef)]+ p1u$coef[1] p1u<-p1u$coef p1d <- rqss(as.numeric(Poi1-GOsd1d) ~ qss(grp1.time,lambda=0.6), tau = 0.5) p1d$coef[2:length(p1d$coef)]=p1d$coef[2:length(p1d$coef)]+ p1d$coef[1] p1d<-p1d$coef p2u <- rqss(as.numeric(Poi2+GOsd2u) ~ qss(grp2.time,lambda=0.6), tau = 0.5) p2u$coef[2:length(p2u$coef)]=p2u$coef[2:length(p2u$coef)]+ p2u$coef[1] p2u<-p2u$coef p2d <- rqss(as.numeric(Poi2-GOsd2d) ~ qss(grp2.time,lambda=0.6), tau = 0.5) p2d$coef[2:length(p2d$coef)]=p2d$coef[2:length(p2d$coef)]+ p2d$coef[1] p2d<-p2d$coef gl <- c(gl,p1u,p1d,p2u,p2d) } ## Plot png(filename = paste0(folder,"/Resultfigure/CS_",sprintf("%01.3f",RESULT[j,"ConsensusScore"],3),"_",gsub(" ", "_", substr(name1, start=1, stop=min(40,nchar(name1)))),".png"),width = 3.25, height = 3.25,units= "in",res = 1200, pointsize = 6) gl2<-max(gl,na.rm = TRUE)-min(gl,na.rm = TRUE) if(S=="GO"){ plot(unique(grp1.time), p1, ylim=c(min(gl)-(0.1*gl2),max(gl,na.rm = TRUE)),type="l", col="red",lwd=2,ylab=paste0("average intensity of ",GOszj," genes"), xlab=" time [h]", xaxt="n") }else{ plot(unique(grp1.time), p1, ylim=c(min(gl)-(0.1*gl2),max(gl,na.rm = TRUE)),type="l", col="red",lwd=2,ylab="Intensity", xlab=" time [h]", xaxt="n") } points(grp1.time,Poi1,col="red") lines(unique(grp2.time), p2,col="blue",lwd=2) points(grp2.time,Poi2 ,col="blue",pch=4) axis(side=1, at=round(unique(grp1.time,grp2.time))) title(main = list(paste0(name," (PubMed:",RESULT[j,"PubMed"],")"), cex = cex1)) legend('bottom','groups',c(grp1n,"measurement",grp2n,"measurement"), pch=c(15,1,15,4),col=c('red','red',"blue","blue"),ncol=4,bty ="n") if(S=="GO"){ polygon(c(unique(grp1.time),rev(unique(grp1.time))),c(p1u,rev(p1d )),col=rgb(1, 0, 0,0.1),xpd=FALSE,border = NA) polygon(c(unique(grp2.time),rev(unique(grp2.time))),c(p2u,rev(p2d )),col=rgb(0, 0, 1,0.1),xpd=FALSE,border = NA) } dev.off() rm(cex1) } #end of if(!is.na(sum(Poi1,Poi2))){} ## end Plot if(S=="GO"){ RESULT[j,"Link"]=paste0("=HYPERLINK(TranslateQuotes",paste0("Resultfigure/CS_",sprintf("%01.3f",RESULT[j,"ConsensusScore"],3),"_",gsub(" ", "_", substr(name1, start=1, stop=min(40,nchar(name1)))),".png"),"TranslateQuotes,TranslateQuotes figure TranslateQuotes)") }else{ RESULT[j,"Link"]=paste0("=HYPERLINK(TranslateQuotes",paste0("Resultfigure/CS_",sprintf("%01.3f",RESULT[j,"ConsensusScore"],3),"_",gsub(" ", "_", substr(name1, start=1, stop=min(40,nchar(name1)))),".png"),"TranslateQuotes,TranslateQuotes",name1,"TranslateQuotes)") } ## progress info if(j %in% prg){ idx=1:length(prg) timeUse<-sum(as.numeric(proc.time()-timeusage)[1:2])*(20-idx[j==prg]) setTkProgressBar(global,(70+idx[j == prg]), label=paste("Produce ",min(nrow(RESULT),MaxPics)," pics to show the dynamic (",round(idx[j == prg]*5)," % done ). This step is expected to be completed by ",ProgTime(timeUse),"." )) timeusage<-proc.time()## end progress info }} ## end Pic production if(S=="GO"){rm(GOsd1u,GOsd1d,GOsd2u,GOsd2d,Poi1,Poi2,p1u,p1d,p2u,p2d,gl)} save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-after-PicsAndLink.RData")) #load("C:/Users/Marco/Documents/CompBioSys/Affymetrix/AlbrechtMethod/contrast_Tr.IGF-vs-Tr.BControl_version_1/Saved_Intermediate_Results/RESULT-after-PicsAndLink.RData") ################################################################################ ######################################## ## VENN-Diagramm ######################################## selection<-c("signifearlyRes", "signifmiddleRes","signiflateRes", "signifResponse") category <-c("Early", "Middle","Late", "Full") VENNfunc(result=RESULT,R0=selection,category=category,SAVEplot=paste0(folder,"/Venn_and_other_characteristics/integralVENN.png") ) ## tresCS1<-DistplotPval(x1=CS1[,1,drop=FALSE],xlab="Combinated Integral Score",main=paste0("Significance in Integral Score"), pVal=pVal,folder=paste0(folder,"/Significance_And_Pval/"),ForTest="Significant_Integral_Score") RESULT[,"IntegralScore"]=CS1[,1]>tresCS1$tres selection<-c("SignPeak", "SignDyn","IntegralScore", "signifConsensus") category <-c("Peak", "Dynamic","Integral", "Consensus") VENNfunc(result=RESULT,R0=selection,category=category,SAVEplot=paste0(folder,"/Venn_and_other_characteristics/ConsensusScore1VENN.png") ) ## if(S=="GO"){selection <-c("SignPeak", "SignDyn","IntegralScore", "SignROS") category <-c("Peak", "Dynamic","Integral", "revOverlap")}else{ RESULT[,"sigPubMed"]<-RESULT[,"PubMed"]>0 selection <-c("SignPeak", "SignDyn","IntegralScore", "sigPubMed") category <-c("Peak", "Dynamic","Integral", "PubMed")} VENNfunc(result=RESULT,R0=selection,category=category,SAVEplot=paste0(folder,"/Venn_and_other_characteristics/ConsensusScore2VENN.png") ) ################################################################################ ######################################## ## Rank based perspective ######################################## RESULT[,"RankConsensus"] <-rank(-(RESULT[,"ConsensusScore"]) ,ties.method = "min", na.last = TRUE) RESULT[,"RankMaxDist"] <-rank(-(RESULT[,"MaxDist"]) ,ties.method = "min", na.last = TRUE) RESULT[,"RankDynamic"] <-rank(-(RESULT[,"Dynamic"]) ,ties.method = "min", na.last = TRUE) RESULT[,"RankearlyResponse"] <-rank(-(RESULT[,"earlyResponse"]) ,ties.method = "min", na.last = TRUE) RESULT[,"RankmiddleResponse"] <-rank(-(RESULT[,"middleResponse"]) ,ties.method = "min", na.last = TRUE) RESULT[,"RanklateResponse"] <-rank(-(RESULT[,"lateResponse"]) ,ties.method = "min", na.last = TRUE) RESULT[,"RankcompleteResponse"]<-rank(-(RESULT[,"completeResponse"]),ties.method = "min", na.last = TRUE) if(S=="GO"){ RESULT[,"RankRevOverlapScore"] <-rank(-(RESULT[,"RevOverlapScore"]),ties.method = "min", na.last = TRUE) RESULT[,"RankMean"] <- round(rowMeans(cbind(rowMeans(RESULT[,c("RankearlyResponse","RankmiddleResponse","RanklateResponse")]), RESULT[,"RankDynamic"],RESULT[,"RankMaxDist"],RESULT[,"RankRevOverlapScore"]))) RESULT[,"RankMedian"] <- apply(RESULT[,c("RankearlyResponse","RankmiddleResponse","RanklateResponse","RankDynamic","RankMaxDist","RankRevOverlapScore")], 1, median) }else{ RESULT[,"RankMean"] <- round(rowMeans(cbind(rowMeans(RESULT[,c("RankearlyResponse","RankmiddleResponse","RanklateResponse")]), RESULT[,"RankDynamic"],RESULT[,"RankMaxDist"]))) RESULT[,"RankMedian"] <- apply(RESULT[,c("RankearlyResponse","RankmiddleResponse","RanklateResponse","RankDynamic","RankMaxDist")], 1, median) } save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-after-Rankbased.RData")) ################################################################################ ######################################## ## Output ######################################## header<-paste0("Start time: (",startTimeFunction,") End Time: (", Sys.time(),") Systeminfo: (",paste(as.character(unique(paste0(Sys.info()))),collapse=" "),")") #################################### ## All information in one if(S=="GO"){RESULT_all<-RESULT[,c("gene_name","ConsensusScore","RankMean", "RankMedian", "PubMed","groupsize","Link","signifConsensus","SignPeak","SignPeakH0","SignInstability","SignROS","SignDyn","signifearlyRes","signifmiddleRes","signiflateRes","signifResponse","PubMedscore","MaxDist","Instability","RevOverlapScore" ,"Dynamic","earlyResponse","middleResponse","lateResponse","completeResponse", "Pval_MaxDist","Pval_RevOverlapScore","Pval_Dynamic","Pval_earlyResponse","Pval_middleResponse","Pval_lateResponse","Pval_completeResponse","Pval_ConsensusScore", "RankConsensus", "RankMaxDist","RankRevOverlapScore", "RankDynamic", "RankearlyResponse", "RankmiddleResponse","RanklateResponse", "RankcompleteResponse","probeset_id") ] colnames(RESULT_all)[1]="GO_name" colnames(RESULT_all)[length(colnames(RESULT_all))]="GO_id" }else{ RESULT_all<-RESULT[,c("gene_name","ConsensusScore","RankMean", "RankMedian", "PubMed","Link","signifConsensus","SignPeak","SignPeakH0","SignInstability","SignDyn","signifearlyRes","signifmiddleRes","signiflateRes","signifResponse","PubMedscore","MaxDist","Instability","Dynamic","earlyResponse","middleResponse","lateResponse","completeResponse", "Pval_MaxDist","Pval_Dynamic","Pval_earlyResponse","Pval_middleResponse","Pval_lateResponse","Pval_completeResponse","Pval_ConsensusScore", "RankConsensus", "RankMaxDist", "RankDynamic", "RankearlyResponse", "RankmiddleResponse","RanklateResponse", "RankcompleteResponse","probeset_id") ] } if(colnames(as.data.frame(annotation))[1]!="annotation"){ RESULT_all<-merge(RESULT_all,annotation,by="row.names") #if relation is working vor annot="" as well as for annot=data.frame(). avoid warning rownames(RESULT_all)<-RESULT_all[,1] RESULT_all<-RESULT_all[,-1] } file_name<-paste0(folder,"/RESULT_",grp1n,"-vs-",grp2n,"_all.tsv") my.write(x=RESULT_all, file =file_name, header=header , f=write.table, sep = "\t",row.names=FALSE, quote=F )# save to tsv or EXCEL sheet save(RESULT_all, file=paste0(folder,"/Saved_Intermediate_Results/RESULT_END_all.RData")) #################################### ## Rank perspective if(S=="GO"){RESULT_rank<-RESULT[,c("gene_name","RankMean", "RankMedian", "PubMed","groupsize","Link","MaxDist","Instability","RevOverlapScore" ,"Dynamic","earlyResponse","middleResponse","lateResponse","completeResponse","SignInstability", "RankConsensus", "RankMaxDist","RankRevOverlapScore", "RankDynamic", "RankearlyResponse", "RankmiddleResponse","RanklateResponse", "RankcompleteResponse","probeset_id") ] colnames(RESULT_rank)[1]="GO_name" colnames(RESULT_rank)[length(colnames(RESULT_rank))]="GO_id" }else{ RESULT_rank<-RESULT[,c("gene_name","RankMean", "RankMedian", "PubMed","Link","MaxDist","Instability","Dynamic","earlyResponse","middleResponse","lateResponse","completeResponse","SignInstability","RankConsensus", "RankMaxDist", "RankDynamic", "RankearlyResponse", "RankmiddleResponse","RanklateResponse", "RankcompleteResponse","probeset_id") ] } if(colnames(as.data.frame(annotation))[1]!="annotation"){ RESULT_rank<-merge(RESULT_rank,annotation,by="row.names") rownames(RESULT_rank)<-RESULT_rank[,1] RESULT_rank<-RESULT_rank[,-1] } file_name<-paste0(folder,"/RESULT_",grp1n,"-vs-",grp2n,"_rank.tsv")# save to tsv or EXCEL sheet my.write(x=RESULT_rank, file =file_name, header=header , f=write.table, sep = "\t",row.names=FALSE, quote=F ) save(RESULT_rank, file=paste0(folder,"/Saved_Intermediate_Results/RESULT_END_rank.RData")) #################################### ## Pval perspective if(S=="GO"){RESULT_Pval<-RESULT[,c("gene_name","ConsensusScore","Pval_ConsensusScore", "PubMed","groupsize","Link","MaxDist","Instability","RevOverlapScore" ,"Dynamic","earlyResponse","middleResponse","lateResponse","completeResponse","SignInstability", "Pval_MaxDist","Pval_RevOverlapScore","Pval_Dynamic","Pval_earlyResponse","Pval_middleResponse","Pval_lateResponse","Pval_completeResponse","Pval_ConsensusScore","probeset_id") ] colnames(RESULT_Pval)[1]="GO_name" colnames(RESULT_Pval)[length(colnames(RESULT_Pval))]="GO_id" }else{ RESULT_Pval<-RESULT[,c("gene_name","ConsensusScore","Pval_ConsensusScore","PubMed","Link","MaxDist","Instability","Dynamic","earlyResponse","middleResponse","lateResponse","completeResponse","SignInstability", "Pval_MaxDist","Pval_Dynamic","Pval_earlyResponse","Pval_middleResponse","Pval_lateResponse","Pval_completeResponse","probeset_id") ] } if(colnames(as.data.frame(annotation))[1]!="annotation"){ RESULT_Pval<-merge(RESULT_Pval,annotation,by="row.names") rownames(RESULT_Pval)<-RESULT_Pval[,1] RESULT_Pval<-RESULT_Pval[,-1]} file_name<-paste0(folder,"/RESULT_",grp1n,"-vs-",grp2n,"_Pval.tsv") my.write(x=RESULT_Pval, file =file_name, header=header , f=write.table, sep = "\t",row.names=FALSE, quote=F )# save to tsv or EXCEL sheet save(RESULT_Pval, file=paste0(folder,"/Saved_Intermediate_Results/RESULT_END_Pval.RData")) #################################### ## Small Help fileConn<-file(paste0(folder,"/If_the_Link_in_table_doesnt_work.txt")) writeLines(c("Hi User", " ", "The link does not work automatically. Excel uses symbols for the hyperlink", "that can not be printed directly from R. To make the link available you will", "need to replace certain parts. Mark in Excel the column with the links and", "use then the Excel function [find and replace all].", "-First, replace all [TranslateQuotes] to correct quotes.", "-Second, change the orientation from the slash (/) to a backslash.", "Now, the links should work. Note that the table only together with the folder", "[Resultfigure] can be moved, because otherwise the reference is lost. If some-", "thing is still not work, look at the hyperlink function in Excel and replace", "something if necessary.", " ", "With best regards", " ", "Marco Albrecht ", "(the developer)"), fileConn) close(fileConn) #################################### ## add to Function Input Infotext<-list() Infotext$sessionInfo2<-sessionInfo();Infotext$CalculationTime<-Sys.time()-startTimeFunction sink(file = paste0(folder, "/Information/Function_Input.txt"),append = T) print(Infotext) sink(file =NULL) #################################### #output close(global) print(" ") print(" ") print(" --- Thanks for analysing with TTCA. Goodbye and take care --- ") print(" ") print(" ") print(" ") print("--------------------------------------------------------------------------------------------------") print(paste0("Start: ",startTimeFunction," ")) print(paste0("End: ",Sys.time()," Kind Regards: Marco Albrecht ")) print("--------------------------------------------------------------------------------------------------") #output return(RESULT_all) } #end TTCA ####################################################################################################################################################################################### ####################################################################################################################################################################################### ####################################################################################################################################################################################### ####################################################################################################################################################################################### ####################################################################################################################################################################################### ####################################################################################################################################################################################### PeakScore<-function(dat,grp, tme,folder,global){ grp1 = dat[, grp==1] grp2 = dat[, grp==2] grp1.time = tme[grp==1] grp2.time = tme[grp==2] ################################################## info<-nrow(grp1) a1<-floor(nrow(dat)/1000) # steps a2<-ceiling(nrow(dat)/a1) # more as 1000 per step for dynamic threshold A1<-apply(dat, 1,mean) A2<-A1[order(A1)] tresVec2<-tresVec1<-f11<-f1<-list(NA) dvth<-dvSd<-NULL inter<-intersect(grp1.time,grp2.time) tresForgroupwise=7 ################################################## if(a1>tresForgroupwise){ #only groupwise if database is big enough/ IF CHANGE than change also by mean-SD plot for (k in 1:a1){ # for group wise Calculation A3<-names(A2[((k-1)*a2+1):(k*a2)]) grp1a <- grp1[ rownames(grp1) %in% A3, ] grp2a <- grp2[ rownames(grp2) %in% A3, ] L<-rep(NA,max(length(grp1a),length(grp2a),na.rm = TRUE)) #for thresholddetermination j1=1 ################################################## grp1e<-grp1a[ ,1:length(inter)]*NA grp2e<-grp2a[ ,1:length(inter)]*NA colnames(grp1e)<-colnames(grp2e)<-as.character(inter) for (i in 1:length(inter)){ # merge columns if( sum(grp1.time==inter[i])>1 ){L[j1]<- as.numeric(quantile(apply(grp1a[,grp1.time==inter[i]], 1, sd), probs =0.95,na.rm=TRUE)) # threshold were 95% is below for each multiple replicated timepoint j1=j1+1 grp1e[,i]<- apply(grp1a[,grp1.time==inter[i]], 1, mean) # mean of replicates }else{ grp1e[,i]<- grp1a[,grp1.time==inter[i]] # replicate direct } # if more than one samples per timepoint are available, than take the mean itensity for this time point. if( sum(grp2.time==inter[i])>1 ){L[j1]<- as.numeric(quantile(apply(grp2a[,grp2.time==inter[i]], 1, sd), probs =0.95,na.rm=TRUE)) # threshold were 95% is below for each multiple replicated timepoint j1=j1+1 grp2e[,i]<- apply(grp2a[,grp2.time==inter[i]], 1, mean) # mean of replicates }else{ grp2e[,i]<- grp2a[,grp2.time==inter[i]] # replicate direct } } d<-apply(abs(grp1e-grp2e), 1, max) ################################################## tres<-mean(L[1:(j1-1)])*2 # threshold: mean of all 95til sd ; twofold dt<-d # for groupthreshold dt[]=tres # for groupthreshold dvSd=c(dvSd,d) # standarddeviatien. in each loop add one value to the vector dvth=c(dvth,dt) # groupthreshold ################################################## ## figure png(filename = paste0(folder,"/Significance_And_Pval/PeakScore/DistanceDistribution_fromLowerMean",((k-1)*a2+1),"toUpperMean",(k*a2),"_threshold_",round(tres,2),".png"),width = 3.25, height = 3.25,units = "in",res = 1200, pointsize = 6) hist(d,breaks=(0:100)/100*max(d,na.rm=TRUE),main="Maximum distance beetween both groups", sub="Everything to the left of the red line was removed (",tres,")") abline(v=tres,col="red",lty=1) dev.off() ## end figure ################################################## # for threshold mean plot tresVec1[[k]]<-mean(A2[((k-1)*a2+1):(k*a2)],na.rm=TRUE) tresVec2[[k]]<-tres #progress setTkProgressBar(global,(3+(k/a1*6)), label=paste("Tests for significant distances between the dynamics (",round(k/a1*100),"% done)")) ################################################## } }else{ L<-rep(NA,max(length(grp1),length(grp2),na.rm = TRUE)) #for thresholddetermination j1=1 ################################################## k grp1e<-grp1[ ,1:length(inter)]*NA grp2e<-grp2[ ,1:length(inter)]*NA colnames(grp1e)<-colnames(grp2e)<-as.character(inter) for (i in 1:length(inter)){ # merge columns if( sum(grp1.time==inter[i])>1 ){L[j1]<- as.numeric(quantile(apply(grp1[,grp1.time==inter[i]], 1, sd), probs =0.95,na.rm=TRUE)) # threshold were 95% is below for each multiple replicated timepoint j1=j1+1 grp1e[,i]<- apply(grp1[,grp1.time==inter[i]], 1, mean) # mean of replicates }else{ grp1e[,i]<- grp1[,grp1.time==inter[i]] # replicate direct } # if more than one samples per timepoint are available, than take the mean itensity for this time point. if( sum(grp2.time==inter[i])>1 ){L[j1]<- as.numeric(quantile(apply(grp2[,grp2.time==inter[i]], 1, sd), probs =0.95,na.rm=TRUE)) # threshold were 95% is below for each multiple replicated timepoint j1=j1+1 grp2e[,i]<- apply(grp2[,grp2.time==inter[i]], 1, mean) # mean of replicates }else{ grp2e[,i]<- grp2[,grp2.time==inter[i]] # replicate direct } } d<-apply(abs(grp1e-grp2e), 1, max) ################################################## tres<-mean(L[1:(j1-1)])*2 # threshold: mean of all 95til sd ; twofold dt<-d # for groupthreshold dt[]=tres # for groupthreshold dvSd=c(dvSd,d) # standarddeviation. in each loop add one value to the vector dvth=c(dvth,dt) # groupthreshold ################################################## ## figure png(filename = paste0(folder,"/Significance_And_Pval/PeakScore/DistanceDistribution_AllinOne_threshold_",round(tres,2),".png"),width = 3.25, height = 3.25,units = "in",res = 1200, pointsize = 6) hist(d,breaks=(0:100)/100*max(d,na.rm=TRUE),main="Maximum distance beetween both groups", sub="Everything to the left of the red line was removed (",tres,")") abline(v=tres,col="red",lty=1) dev.off() ## end figure } ################################################## ## For RESULT dvSd <- as.data.frame(dvSd) dvth <- as.data.frame(dvth) RESULT<-merge(dvth,dvSd ,by="row.names", all = TRUE) rownames(RESULT)<-RESULT[,1] RESULT<-RESULT[,-1] colnames(RESULT)<-c("MaxDistThres","MaxDist") RESULT[,"SignPeak"]<-RESULT[,"MaxDist"]> RESULT[,"MaxDistThres"] ################################################## ## Instability detection: Median standarddeviation across all replicated timepoints Duplic<-unique(grp1.time[duplicated(grp1.time)],grp2.time[duplicated(grp2.time)]) Ls<-matrix(NA,nrow(dat),length(Duplic)) for (i in 1:length(Duplic)){ Ls[,i]<-as.numeric(apply(grp1[ ,grp1.time==Duplic[i]], 1, sd)) } sL<-t(Ls) # remove duplicated columns if any Ls<-t(sL[!duplicated(sL),]) RESULT[,"Instability"] <-apply(Ls,1,median) RESULT[,"SignInstability"]<-RESULT[,"Instability"]> RESULT[,"MaxDistThres"] ############### ## output preparation RESULT[,"MaxDist"] <-RESULT[,"MaxDist"]/(max(dat,na.rm=TRUE)-min(dat,na.rm=TRUE)) #normalize to complete intensity bandbrid of the array RESULT[,"Instability"] <-RESULT[,"Instability"]/(max(dat,na.rm=TRUE)-min(dat,na.rm=TRUE)) #normalize to complete intensity bandbrid of the array ################################################## ## plot if(a1>tresForgroupwise){ tresVec1<-unlist(tresVec1) tresVec2<-unlist(tresVec2) dynamicMean<-as.numeric(rowMeans(dat)) # png(filename = paste0(folder,"/Venn_and_other_characteristics/a_Mean_Standard_Deviation_Plot.png"),width = 3.25, height = 3.25,units = "in",res = 1200, pointsize = 6) plot(tresVec1,tresVec2,type = "p", bty = "n",pch=16,col="red", xlab = "Mean of a dynamic", ylab = "Standard deviation threshold",xlim=c(min(dynamicMean),max(dynamicMean,na.rm = TRUE)),main=paste0("Mean of the dynamics and their related standard derivation")) par(new=TRUE) hist(dynamicMean, breaks = 100, prob=TRUE,xlim=c(min(dynamicMean),max(dynamicMean,na.rm = TRUE)), axes = FALSE, xlab = "", ylab = "",main="") dev.off() } ## end Plot ################################################## ## Output RESULT<-RESULT[,c("MaxDist","SignPeak","Instability","SignInstability")] save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-with-PeakScore.RData")) return(RESULT) } # end Peakscore ####################################################################################################################################################################################### PeakScoreHOH1<-function(dat,grp, tme,folder,global){ grp1 = dat[, grp==1] grp2 = dat[, grp==2] grp1.time = tme[grp==1] grp2.time = tme[grp==2] ################################################## info<-nrow(grp1) inter<-intersect(grp1.time,grp2.time) ################################################## k grp1e<-grp1[ ,1:length(inter)]*NA grp2e<-grp2[ ,1:length(inter)]*NA colnames(grp1e)<-colnames(grp2e)<-as.character(inter) for(i in 1:length(inter)){ # merge columns if( sum(grp1.time==inter[i])>1 ){grp1e[,i]<- apply(grp1[,grp1.time==inter[i]], 1, mean) # mean of replicates }else{ grp1e[,i]<- grp1[,grp1.time==inter[i]] # replicate direct } # if more than one samples per timepoint are available, than take the mean itensity for this time point. if( sum(grp2.time==inter[i])>1 ){grp2e[,i]<- apply(grp2[,grp2.time==inter[i]], 1, mean) # mean of replicates }else{ grp2e[,i]<- grp2[,grp2.time==inter[i]] # replicate direct } } PeakHOH1 <- apply(abs(grp1e-grp2e), 1, max) RESULT<- as.data.frame(PeakHOH1) colnames(RESULT)<-"MaxDist" RESULT["Instability"]<-NA RESULT["SignInstability"]<-NA RESULT["SignPeak"]<-RESULT["SignInstability"] < NA RESULT<-RESULT[,c("MaxDist","SignPeak","Instability","SignInstability")] save(RESULT, file=paste0(folder,"/Saved_Intermediate_Results/RESULT-with-PeakScore-Without-use-of-replicates.RData")) return(RESULT) } # end PeakscoreHOH1 ####################################################################################################################################################################################### VENNfunc<-function(result,R0,category,SAVEplot){ R1<-result[,R0] R2<-rownames(result) B1 <-unique(R2[ R1[,1]]) E2 <-unique(R2[ R1[,2]]) G3 <-unique(R2[ R1[,3]]) M4 <-unique(R2[ R1[,4]]) str1 <- R0[1] str2 <- R0[2] str3 <- R0[3] str4 <- R0[4] png(filename = SAVEplot,width = 3.5,height = 3.25,units = "in",res = 1200, pointsize = 6) draw.quad.venn( area1=length(B1), area2=length(E2), area3=length(G3), area4=length(M4), n12 =length(intersect( B1,E2 )), n13 =length(intersect( B1,G3 )), n14 =length(intersect( B1,M4 )), n23 =length(intersect( E2,G3 )), n24 =length(intersect( E2,M4 )), n34 =length(intersect( G3,M4 )), n123 =length(intersect(intersect( B1,E2 ),G3 )), n124 =length(intersect(intersect( B1,E2 ),M4 )), n134 =length(intersect(intersect( B1,G3 ),M4 )), n234 =length(intersect(intersect( E2,G3 ),M4 )), n1234 =length(intersect(intersect(intersect( B1,E2 ),G3 ),M4)), category = category, fill = c("yellow1", "purple4", "turquoise1", "chartreuse2"), label.col = "gray20", alpha = 0.5, cex = 1, col="aliceblue", cat.cex = 2, cat.col = c("yellow1", "purple4", "turquoise1", "chartreuse2") ) dev.off() } ####################################################################################################################################################################################### Integral<-function(dat="grp1.grp2", grp="grp", timeInt= "timeInt",grp1.time="grp1.time",grp2.time="grp2.time",globMaxDist ){ # available data for each time course dat1 = as.numeric(dat[ grp==1]) dat2 = as.numeric(dat[ grp==2]) #Identify time gaps time=unique(c(grp1.time,grp2.time, timeInt)) tim1<-time[!(time %in% grp1.time)] tim2<-time[!(time %in% grp2.time)] # Gives a table: 1 row: measurements and missing values (NA) # 2 row: time points RB1<- RB2<-rbind(rep(NA,length(time)),time) RB1[1,which(time %in% grp1.time)]<-dat1 RB2[1,which(time %in% grp2.time)]<-dat2 # Interpolate missing values of time course 1 mis1<-which(is.na(RB1[1,])) #where are missing values? exis1<-which(!(is.na(RB1[1,]))) #where are existing values? if (length(mis1)>0){ for (i in 1:length(mis1)){ v1=mis1[i] RB1[1,v1]<-RB1[1,max(exis1[exis1v1])]-RB1[1,max(exis1[exis1v1])])-as.numeric(RB1[2,max(exis1[exis10){ for (i in 1:length(mis2)){ v2=mis2[i] RB2[1,v2]<-RB2[1,max(exis2[exis2v2])]-RB2[1,max(exis2[exis2v2])])-as.numeric(RB2[2,max(exis2[exis20){ # Add columns for the new values at the timepoints for curve crossing RB<-cbind(RB,matrix(rep(NA,length(sp)*5),5,length(sp))) # calculate Intercept point and add values in new columns for (i in 1:length(sp)){ r=sp[i] RB[3,(length(time)+i)]<-RB[3,(r-1)]+((RB[1,(r-1)]-RB[2,(r-1)])*(RB[3,r]-RB[3,(r-1)]))/(RB[2,r]-RB[2,(r-1)]-RB[1,r]+RB[1,(r-1)]) RB[1,(length(time)+i)]<-RB[1,(r-1)]+((RB[1,r]-RB[1,(r-1)])/(RB[3,r]-RB[3,(r-1)]))*(RB[3,(length(time)+i)]-RB[3,(r-1)]) RB[2,(length(time)+i)]<-RB[2,(r-1)]+((RB[2,r]-RB[2,(r-1)])/(RB[3,r]-RB[3,(r-1)]))*(RB[3,(length(time)+i)]-RB[3,(r-1)]) }} # Order matrix according time sequence RB<-RB[,order(RB[3,])] # calculate trapezoids RB[5,1]=0 for (i in 2:ncol(RB)){ RB[5,i]<-abs(( (RB[1,i]+RB[1,(i-1)])/2 - (RB[2,i]+RB[2,(i-1)])/2) * as.numeric(RB[3,i]-RB[3,(i-1)])) } earlyRev<- globMaxDist*as.numeric((RB[3,which(RB[3,]==timeInt[1])]-RB[3,1])) middleRev<-globMaxDist*as.numeric((RB[3,which(RB[3,]==timeInt[2])]-RB[3,which(RB[3,]==timeInt[1])])) lateRev<- globMaxDist*as.numeric((RB[3,ncol(RB)] -RB[3,which(RB[3,]==timeInt[2])])) compRev<- globMaxDist*as.numeric((RB[3,ncol(RB)] -RB[3,1])) # period specific integral, normalized by maximum value earlyResponse <-sum(RB[5,c(( 1 ):which(RB[3,]==timeInt[1]))])/earlyRev middleResponse <-sum(RB[5,c((which.max(RB[3,]==timeInt[1])+1):which(RB[3,]==timeInt[2]))])/middleRev lateResponse <-sum(RB[5,c((which.max(RB[3,]==timeInt[2])+1): ncol(RB) )])/lateRev completeResponse <-sum(RB[5, ])/compRev sum(earlyResponse,middleResponse,lateResponse) completeResponse I=c(earlyResponse,middleResponse,lateResponse,completeResponse) return(I) } ####################################################################################################################################################################################### #upper<-DistplotPval(x1=RESULT[,"MaxDist" ,drop=FALSE],xlab=xlab, pVal=pVal,folder=paste0(folder,"/LocalHypothesis/")) DistplotPval<-function(x1,xlab="value",main="Null hypothesis versus alternative hypothesis", pVal=pVal,folder,ForTest){ Res=x1 name1=colnames(x1) name2=paste0("Pval_",name1) x1=x1[,1,drop=TRUE] #from data.frame to numeric vector x1[is.na(x1)]=0.00001 #ok if one expect only one or two NA nbox=150 #number of boxes in destribution xfit<-seq(min(x1,na.rm = TRUE),max(x1,na.rm = TRUE),length=2000) ## find optimal Method and Parameter for fit d1 <-c(seq(0,(max(x1)+(max(x1)-min(x1)))+0.0001,(max(x1)-min(x1))/8),0.0001,0.001,0.01)# with focus around zero and small trend in positiv direction d1 <-d1[order(d1)] dmax <-max(abs(d1)) d3<-d2 <-1:length(d1)*NA for (i in 1:length(d1)){ A<-optimalMethodeDistr( delta=d1[i],x1=x1,dmax=dmax) d2[i]<-A$opt d3[i]<-A$method rm(A) } bestMethod<-d3[which(d2==max(d2,na.rm = TRUE))[1]] delta<-d1[which(d2==max(d2,na.rm = TRUE))[1]] ## transformation x1 = x1 + delta xfit<-seq(min(x1,na.rm = TRUE),max(x1,na.rm = TRUE),length=2000) # parG<-suppressWarnings(fitdistr(x1,bestMethod)) if(bestMethod=="cauchy") {yfit<-dcauchy(xfit,location=as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2])) Res[,name2]= apply(X=Res[,name1,drop=FALSE],MARGIN=1, function(x) pcauchy((x+delta),location=as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) #Pval=P[X > x] fminforPval<-function(x, parG,pVal){ a=abs(pVal-pcauchy(x,location=as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) } upper= optimize(f = fminforPval , lower = min(x1),upper = max(x1), maximum = FALSE,tol = .0000001,parG=parG,pVal=pVal )$minimum } if(bestMethod=="gamma") {yfit<-dgamma(xfit,shape=as.numeric(parG[]$estimate[1]), rate = as.numeric(parG[]$estimate[2])) Res[,name2]= apply(X=Res[,name1,drop=FALSE],MARGIN=1, function(x) pgamma((x+delta),shape=as.numeric(parG[]$estimate[1]), rate = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) #Pval=P[X > x] fminforPval<-function(x, parG,pVal){ a=abs(pVal-pgamma(x,shape=as.numeric(parG[]$estimate[1]), rate = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) } upper= optimize(f = fminforPval , lower = min(x1),upper = max(x1), maximum = FALSE,tol = .0000001,parG=parG,pVal=pVal )$minimum } if(bestMethod=="lognormal"){yfit<-dlnorm(xfit,meanlog=as.numeric(parG[]$estimate[1]), sdlog = as.numeric(parG[]$estimate[2])) Res[,name2]= apply(X=Res[,name1,drop=FALSE],MARGIN=1, function(x) plnorm((x+delta),meanlog=as.numeric(parG[]$estimate[1]), sdlog = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) #Pval=P[X > x] fminforPval<-function(x,parG,pVal){ a=abs(pVal-plnorm(x,meanlog=as.numeric(parG[]$estimate[1]), sdlog = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) } upper= optimize(f = fminforPval , lower = min(x1),upper = max(x1), maximum = FALSE,tol = .0000001, parG=parG,pVal=pVal )$minimum } if(bestMethod=="logistic") {yfit<-dlogis(xfit,location=as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2])) Res[,name2]= apply(X=Res[,name1,drop=FALSE],MARGIN=1, function(x) plogis((x+delta),location=as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) #Pval=P[X > x] fminforPval<-function(x, parG,pVal){ a=abs(pVal - plogis(x,location=as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) } upper= optimize(f = fminforPval , lower = min(x1),upper = max(x1), maximum = FALSE,tol = .0000001,parG=parG,pVal=pVal )$minimum } if(bestMethod=="normal") {yfit<-dnorm(xfit,mean=as.numeric(parG[]$estimate[1]), sd = as.numeric(parG[]$estimate[2])) Res[,name2]= apply(X=Res[,name1,drop=FALSE],MARGIN=1, function(x) pnorm((x+delta),mean=as.numeric(parG[]$estimate[1]), sd = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) #Pval=P[X > x] fminforPval<-function(x, parG,pVal){ a=abs(pVal - pnorm(x,mean=as.numeric(parG[]$estimate[1]), sd = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) } upper= optimize(f = fminforPval , lower = min(x1),upper = max(x1), maximum = FALSE,tol = .0000001, parG=parG,pVal=pVal )$minimum } if(bestMethod=="weibull") {yfit<-dweibull(xfit,shape = as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2])) Res[,name2]= apply(X=Res[,name1,drop=FALSE],MARGIN=1, function(x) pweibull((x+delta),shape = as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) #Pval=P[X > x] fminforPval<-function(x, parG,pVal){ a=abs(pVal - pweibull(x,shape = as.numeric(parG[]$estimate[1]),scale = as.numeric(parG[]$estimate[2]),lower.tail = FALSE)) } upper= optimize(f = fminforPval , lower = min(x1),upper = max(x1), maximum = FALSE,tol = .0000001, parG=parG,pVal=pVal )$minimum } yfit =yfit*diff(hist(x1,breaks=nbox,plot = FALSE)$mids[1:2])*length(x1) pthresu=yfit[which(xfit>=upper)[1]] # end calculate y-values. push x-Achses back x1 = x1 - delta xfit=xfit- delta upper=upper-delta # Pic png(filename = paste0(folder,ForTest,"_PVal-threshold_",round(upper,3),".png"),width = 3.25, height = 3.25,units = "in",res = 1200, pointsize = 6) hist(x1,breaks=nbox, main=main,xlab=xlab)#,xaxt="n") lines(xfit, yfit, col="blue", lwd=2) polygon(c(upper,upper+max(xfit)/150,upper+max(xfit)/150,upper),c(pthresu*1.5,pthresu*1.2,0,0),col="red4",border="red4") legend("topright", inset=.05,c(paste0("Fit with ",bestMethod," distribution"), paste0("Significant dynamics with p-value: ",round(pVal,3)),paste0("threshold: ",round(upper,3))), fill=c("blue","lightcoral","red4"), horiz=FALSE,bty = "n") polygon(c(xfit[xfit>upper],rev(xfit[xfit>upper])),c(yfit[xfit>upper],yfit[xfit>upper]*0),col=rgb(1, 0, 0,0.5),xpd=FALSE)# dev.off() #End Pic A=list() A$tres=upper A$Pval=Res[,name2,drop=FALSE] return(A) } ####################################################################################################################################################################################### optimalMethodeDistr<-function(delta, x1,dmax ){ x1= x1+delta xfit<-seq(min(x1,na.rm = TRUE),max(x1,na.rm = TRUE),length=2000) # search destribution with bestk fit cmet<-c( "cauchy", "gamma", "lognormal", "logistic","normal","weibull") loglik<-NULL for (i in 1:length(cmet)){ loglik[i]<-suppressWarnings(tryCatch(fitdistr(x1,cmet[i])$loglik, error=function(e) {return(-Inf)})) #test godness of fit, if somthing produce error, return -inf } A=list() A$opt<-max(loglik,na.rm = TRUE)-(abs(delta)/dmax)*0.01*max(loglik,na.rm = TRUE)# with penaltyterm to have minimum delta if the result is equal A$method<-cmet[which(loglik==max(loglik,na.rm = TRUE))] return(A) } ####################################################################################################################################################################################### my.write <- function(x, file, header, f = write.csv, ...){ # create and open the file connection datafile <- file(file, open = 'wt') # close on exit on.exit(close(datafile)) # if a header is defined, write it to the file if(!missing(header)) writeLines(header,con=datafile) # write the file using the defined function and required addition arguments f(x, datafile,...) } ####################################################################################################################################################################################### #FF.median(dat=grp1.grp2, grp=grp, tme=grp1.grp2.time,dynScor=dynScor,global=global) FF.median = function(dat, grp, tme,dynScor=0,global=0){ dat1 = dat[, grp==1] dat2 = dat[, grp==2] tme1 = tme[grp==1] tme2 = tme[grp==2] weight<-weightvec(grp1.time=tme1,grp2.time=tme2) prg<-unique(round((seq(nrow(dat)/32,nrow(dat), nrow(dat)/32)))) timeusage<-proc.time() rss0 = sapply(1:nrow(dat) , medianr, dat=dat , xx=tme ,w=weight$grp1and2 , k=2,prg=prg,dynScor=dynScor) timeUse<-sum(as.numeric(proc.time()-timeusage)[1:2])*2 if (as.character(global )[1]!="0"){setTkProgressBar(global, 20, label=paste0( "Program checks whether the dynamics are significant. This step is expected to be completed by ",ProgTime(timeUse),"."))} if (as.character(dynScor)[1]!="0"){setTkProgressBar(dynScor,34, label=paste0( "Program checks whether the dynamics are significant. (",34," % done)"))} weight<-weightvec(grp1.time=tme1,grp2.time=tme2) prg<-unique(round((seq(nrow(dat)/32,nrow(dat), nrow(dat)/32)))) timeusage<-proc.time() # fill up a list in list here. first list indicate gene. second list indicate error value a and fitted values res. Output1 = lapply(1:nrow(dat1), medianrS, dat=dat1, xx=tme1 ,w=weight$grp1 ,k=34,prg=prg,dynScor=dynScor) timeUse<-sum(as.numeric(proc.time()-timeusage)[1:2]) if (as.character(global )[1]!="0"){setTkProgressBar(global, 30, label=paste0( "Program checks whether the dynamics are significant. This step is expected to be completed by ",ProgTime(timeUse),"."))} if (as.character(dynScor)[1]!="0"){setTkProgressBar(dynScor,66, label=paste0( "Program checks whether the dynamics are significant. (",66," % done)"))} Output2 = lapply(1:nrow(dat2), medianrS, dat=dat2, xx=tme2 ,w=weight$grp2 ,k=66,prg=prg,dynScor=dynScor) if (as.character(global )[1]!="0"){setTkProgressBar(global, 39, label=paste0( "Program checks whether the dynamics are significant. This step will be finished soon."))} if (as.character(dynScor)[1]!="0"){setTkProgressBar(dynScor,99, label=paste0( "Program checks whether the dynamics are significant. (",99," % done)"))} # unlist list in list. RolfA1<-1:length(Output1)*NA RolfRes1<- matrix( NA, length(Output1),length(Output1[[1]]$res)) for (i in 1:length(Output1) ){ RolfA1[i]<-Output1[[i]]$a RolfRes1[i,]<-Output1[[i]]$res } RolfA2<-1:length(Output2)*NA RolfRes2<- matrix( NA, length(Output2),length(Output2[[1]]$res)) for (i in 1:length(Output2) ){ RolfA2[i]<-Output2[[i]]$a RolfRes2[i,]<-Output2[[i]]$res } rss1 = colSums(rbind(RolfA1, RolfA2)) # Make output list Dynamic=list() Dynamic$FF <- rss0/rss1 Dynamic$qr1<-RolfRes1 Dynamic$qr2<-RolfRes2 return(Dynamic) } ####################################################################################################################################################################################### ProgTime<-function(timeforSample) { tt1<-as.numeric(unlist(strsplit( format(Sys.time(), "%X "),":")[1])) Sincemidnight<-tt1[1]*3600+tt1[2]*60+tt1[3] prognostTime<-timeforSample+Sincemidnight min<-h<-d<-0 d<-floor(prognostTime/(24*3600)) prognostTime<-prognostTime-24*3600*d h<-floor(prognostTime/(3600)) prognostTime<-prognostTime-3600*h min<-floor(prognostTime/(60)) if (h<12){ uhrzeit<-paste0(sprintf("%02.0f", h),":",sprintf("%02.0f", min)," AM")} else { uhrzeit<-paste0(sprintf("%02.0f", h-12),":",sprintf("%02.0f", min)," PM")} if (d==0){ day<-"today" } if (d==1){ day<-"tomorrow" } if (d>1){ day<- paste0("in ",d," days") } time<-paste0( uhrzeit," ",day ) return(time) } ####################################################################################################################################################################################### # grp1.time <- c(0,0,0.5,0.5,1,2,3,3,3,4,5,6,8,12,18,24,24,24,48,48,48,50,50,50) # grp2.time <- c(0,0,0,0.5,0.5,2,4,6,8,12,24,24,24,48,48,48,48,48) # weight<-weightvec(grp1.time=grp1.time,grp2.time=grp2.time) weightvec<-function(grp1.time,grp2.time){ d<-c(grp1.time,grp2.time) e<-rep(1, length(d)) a<-unique(c(grp1.time,grp2.time)) b<-unique(grp1.time) f<-unique(grp2.time) # Compare only common time points e[!(d %in% intersect(b,f))]<-0 e1<-e[1:length(grp1.time)] e2<-e[(length(grp1.time)+1):length(e)] e3<-e g1<-as.data.frame(table(grp1.time)) for (i in 1:length(grp1.time)){ e1[i]<-e1[i]*(1/g1[g1$grp1.time==grp1.time[i],2] )} # each common time-point weight have the same value 1 e1<-e1*(length(grp2.time)/length(grp1.time)) # models with more datapoints have more residuels, but perhaps a better fit. # all residuels weighted to residuel per datapoint # otherwise a model with less datapoints have a benefit, because there more flexible in fitting #sum(e1/(length(grp1.time))) #sum(e1*(length(grp2.time)/length(grp1.time))) #sum(e1*(length(grp2.time)/(length(grp1.time)+length(grp2.time)))) g2<-as.data.frame(table(grp2.time)) for (i in 1:length(grp2.time)){ e2[i]<-e2[i]*(1/g2[g2$grp2.time==grp2.time[i],2] )} e2<-e2*(length(grp1.time)/length(grp2.time)) #sum(e2/(length(grp2.time))) #sum(e2*(length(grp1.time)/length(grp2.time))) #sum(e2*(length(grp1.time)/(length(grp1.time)+length(grp2.time)))) sum(c(e1,e2)) g3<-as.data.frame(table(d)) for (i in 1:length(d)){ e3[i]<-e3[i]*(1/g3[g3$d==d[i],2] )} e3<-e3*sum(c(e1,e2))/sum(e3) # balance the local null hypotheses out #sum(e3/length(e3)) A<-list() A$grp1<-e1 A$grp2<-e2 A$grp1and2<-e3 return(A) } ####################################################################################################################################################################################### # res2 = sapply(1:nrow(dat2), medianr, dat=dat2, xx=tme2 ,w=weight$grp2 ,k=66,prg=prg,dynScor=dynScor) medianr = function(i, dat, xx ,w,k,prg,dynScor) { res <- rqss(as.numeric(dat[i,]) ~ qss(xx,lambda=0.6), tau = 0.5) #knots/estimated Intensity for each time point res$coef[2:length(res$coef)]=res$coef[2:length(res$coef)]+ res$coef[1] res<-res$coef z1<-rle(xx[order(xx)])$lengths # replikates per timepoint z2<-cumsum(c(1,z1)) # cumative sum H<-matrix(NA,1,sum(z1)) # Intensity for each timepoint multiplicated with amount of replicates: for (j in 1:length(z1)){ H[z2[j]:(z2[j]+z1[j]-1)]=rep(res[j],z1[j]) } l<-abs(H-dat[i,]) # residual= estimated Intensity minus rawdata a<-sum(l*w) # each value have a particular weight # progressbar if ((i %in% prg) & (as.character(dynScor)[1]!="0") ){ idx=1:length(prg) setTkProgressBar(dynScor, (idx[i==prg]+k), label=paste0( "Program checks whether the dynamics are significant. (",idx[i==prg]+k," % done)")) } return(a) } ###################################################################### # like medianr but with fitted values as additional feedback medianrS = function(i, dat, xx ,w,k,prg,dynScor) { res <- rqss(as.numeric(dat[i,]) ~ qss(xx,lambda=0.6), tau = 0.5) #knots/estimated Intensity for each time point res$coef[2:length(res$coef)]=res$coef[2:length(res$coef)]+ res$coef[1] res<-res$coef z1<-rle(xx[order(xx)])$lengths # replikates per timepoint z2<-cumsum(c(1,z1)) # cumative sum H<-matrix(NA,1,sum(z1)) #prepare an empty vector # Intensity for each timepoint multiplicated with amount of replicates: for (j in 1:length(z1)){ H[z2[j]:(z2[j]+z1[j]-1)]=rep(res[j],z1[j]) # New fitted value times number of replikates to obtain a vector with the same size as original measurement vector is. } l<-abs(H-dat[i,]) # residual= estimated Intensity minus rawdata a<-sum(l*w) # each value have a particular weight # progressbar if ((i %in% prg) & (as.character(dynScor)[1]!="0") ){ idx=1:length(prg) setTkProgressBar(dynScor, (idx[i==prg]+k), label=paste0( "Program checks whether the dynamics are significant. (",idx[i==prg]+k," % done)")) } Output=list() Output$a<-a Output$res<-res return(Output) } ################################################################################################################################# ChangeToGO<-function(grp1=grp1,grp2=grp2,grp1.time=grp1.time, mapGO){ # dat=merge(grp1,grp2,by="row.names") rownames(dat) <- dat[,"Row.names"] dat <- dat[,-1] grp <- c(rep(1, ncol(grp1)),rep(2, ncol(grp2)))# data groups A3<-unique(mapGO[,"go_id"]) B<-matrix(NA,length(A3),ncol(dat)) colnames(B)=colnames(dat) rownames(B)=A3 Sd<-Su<-B #S for standarddeviation, B for mean Itensity groupsize<-B[ ,1,drop=FALSE] colnames(groupsize)<-"groupsize" prggo<-unique(round((seq(nrow(B)/80,nrow(B), nrow(B)/80)))) #for progress idxgo=1:length(prggo) nstart=which(duplicated(grp1.time)==TRUE)[1] #number of replicates at starttime (0h) for (i in 1:nrow(B)){ A4 = mapGO[mapGO[,"go_id"]==rownames(B)[i],"probeset_id"] #probeset Ids for a given GO group A5 = dat[rownames(dat) %in% A4 ,] # expression data for each probeset_ID for a given group A6 = t(apply(A5,1,function(x) x- mean(x[1:nstart])))# zeropoint is mean centered for all genes in this group if(nrow(A6)>2){ B[i,]=apply(A6,2,mean) A7<-A6*NA groupsize[i,1]<-nrow(A6) for (k in 1:nrow(A6)){ A7[k,] = A6[k,] > B[i,] } for (j in 1:ncol(A6)){ su <- A6[A7[ ,j]==1,j] # over the mean Su[i,j] <- mean(su-B[i,j]) sd <- A6[A7[ ,j]==0,j] # under the mean Sd[i,j] <- -mean(sd-B[i,j]) }}} B<-B[!is.na(B[,1]),] Su<-Su[!is.na(Su[,1]),] Sd<-Sd[!is.na(Sd[,1]),] foundall<-intersect(intersect(rownames(B),rownames(Su)),rownames(Sd)) # get sure that all have the same Go groups B<-B[rownames(B) %in% foundall,] Su<-Su[rownames(Su) %in% foundall,] Sd<-Sd[rownames(Sd) %in% foundall,] print(paste0(format(Sys.time(), "%X"),": ",nrow(B)," Gene Ontology groups can be analyzed ")) #### A3<-mapGO[,c("go_id","GO_name")] colnames(A3)=c("probeset_id","gene_name") #only for the programm A4<-A3[A3[,"probeset_id"] %in% rownames(B),] A4<-A4[!duplicated(A4[,"probeset_id"]),] #ID in row have to be unique rownames(A4)<-A4[,"probeset_id"] ### GO<-list() GO$grp1<-B[,grp==1] GO$grp2<-B[,grp==2] GO$annot<-A4 GO$groupsize<-groupsize GO$grp1SDu<-Su[,grp==1] GO$grp2SDu<-Su[,grp==2] GO$grp1SDd<-Sd[,grp==1] GO$grp2SDd<-Sd[,grp==2] return(GO) } ################################################################################################################################# revOverlapScore<-function(go=GO,tm1=grp1.time,tm2=grp2.time,folder=folder){ GO<-NULL grp1.time<-NULL grp2.time<-NULL tmi<-intersect(tm1,tm2) grp1<-go$grp1 grp2<-go$grp2 grp1SDu <-go$grp1SDu grp2SDu <-go$grp2SDu grp1SDd <-go$grp1SDd grp2SDd <-go$grp2SDd GOsz <-go$groupsize ROS<-grp1[,1,drop=FALSE]*NA colnames(ROS)<-"RevOverlapScore" for(n in 1:nrow(ROS)){ ovl=1:length(tmi)*NA for(m in 1:length(tmi)){ m1<-mean( grp1[n, tm1==tmi[m]]) m2<-mean( grp2[n, tm2==tmi[m]]) sdu1<-mean(grp1SDu[n, tm1==tmi[m]]) sdu2<-mean(grp2SDu[n, tm2==tmi[m]]) sdd1<-mean(grp1SDd[n, tm1==tmi[m]]) sdd2<-mean(grp2SDd[n, tm2==tmi[m]]) if(all(m1==m2,sdu1==sdu2,sdd1==sdd2)){ ovl[m]=NA }else{ #ignore identical columns. Example 0 control ovl[m]= max((m1-sdd1)-(m2+sdu2),(m2-sdd2)-(m1+sdu1))/(0.5*(sdu1+sdu2+sdd1+sdd2)) }} ROS[n,]<-max(ovl, na.rm = TRUE) } # for Pic png(filename = paste0(folder,"/Venn_and_other_characteristics/ReverseOverlapscore_MintoMax.png"),width = 5.25,height = 6.25,units = "in",res = 1200, pointsize = 6) par(mfrow=c(2,1)) M<-which(ROS==max(ROS, na.rm = TRUE))[1] Mr<-c(grp1[M,],grp2[M,],grp1[M,]+grp1SDu[M,],grp2[M,]+grp2SDu[M,],rev(grp1[M,]-grp1SDd[M,]),grp2[M,]-grp2SDd[M,]) plot(tm1,grp1[M,], type="l" ,col="red",ylab="Intensity change",xlab="time",main=paste0("maximum reverse overlap score: ",round(max(ROS),2)," (",-round(max(ROS)*100,1),"% minimum overlap)"), ylim=range(Mr)) lines(tm2,grp2[M,],col="blue") polygon(c((tm1),rev((tm1))),c(grp1[M,]+grp1SDu[M,],rev(grp1[M,]-grp1SDd[M,])),col=rgb(1, 0, 0,0.1),xpd=FALSE,border = NA) polygon(c((tm2),rev((tm2))),c(grp2[M,]+grp2SDu[M,],rev(grp2[M,]-grp2SDd[M,])),col=rgb(0, 0, 1,0.1),xpd=FALSE,border = NA) M<-which(ROS==min(ROS, na.rm = TRUE))[1] Mr<-c(grp1[M,],grp2[M,],grp1[M,]+grp1SDu[M,],grp2[M,]+grp2SDu[M,],rev(grp1[M,]-grp1SDd[M,]),grp2[M,]-grp2SDd[M,]) plot(tm1,grp1[M,], type="l" ,col="red",ylab="Intensity change",xlab="time",main=paste0("Minimum reverse overlap score: ",round(min(ROS),2)," (",round(-min(ROS)*100,1),"% minimum overlap)"),ylim=range(Mr)) lines(tm2,grp2[M,],col="blue") polygon(c((tm1),rev((tm1))),c(grp1[M,]+grp1SDu[M,],rev(grp1[M,]-grp1SDd[M,])),col=rgb(1, 0, 0,0.1),xpd=FALSE,border = NA) polygon(c((tm2),rev((tm2))),c(grp2[M,]+grp2SDu[M,],rev(grp2[M,]-grp2SDd[M,])),col=rgb(0, 0, 1,0.1),xpd=FALSE,border = NA) dev.off() # end Pic return(ROS) } #end function