### This script is for the analyses ran in the "Valuing Species on the Cheap" ###comment for Animal Conservation June 2015 ###by Laurelene Faye, Remi Matthey-Doret and Arne O. Mooers. ## Script by Remi Matthey-Doret, Arne Mooers and Laurelene Faye ## Version du 2015-06-14 (updated 2015-06-24) ##Packages require(plyr) require(tcltk) require(tcltk2) require(data.table) require(ape) require(phytools) require(caper) ############################################################################################################### ############################################ 1. Functions ##################################################### ############################################################################################################### # x = obs. nb of sp in this group x = x,q number of white balls drawn from the urn # ss = nb of sampled species ss = k = number of balls drawn from the urn # ng = nb of species in this group ng = m = number of white balls in the urn # n = total nb of species in the tree n - ng = their n = number of black balls in the urn PGEX = function(x, ng, n, ss) 1 - phyper(q = x - 1, m = ng, n = n - ng, k = ss) PSEX = function(x, ng, n, ss) phyper(q = x, m = ng, n = n - ng, k = ss) ################################################################################################################ ############################################ 2. Data re-formatting ############################################# ################################################################################################################ # I. Data-formating for reference tree (data) data = read.table("path_to_Huang2012", sep="\t", header=T) for(l in 1:nrow(data)){ data[l,9] = strsplit(as.character(data$Species), " ")[[l]][1] data[l,10] = strsplit(as.character(data$Species), " ")[[l]][2] } data=data[,c(1,9,10,3,4,5,6,7,8)] colnames(data)=c("Family", "Genus", "Species", "Reef", "Red.List.category", "EoE.rank", "EDGE.rank.IUCN100.", "EDGE.rank.Isaac.", "EDGE.rank.Pessimistic." ) data=data[data[,5]!="N/A",] nbfamilies=length(unique(data[,1])) nbgeneras=length(unique(data[,2])) n=length(data[,1]) # II. Data-formating for Fukami 2008 tree (dataFu) dataFu = read.table("path_to_Fukami2008", sep="\t", header=T) colnames(dataFu)=c("Family","Genus", "Species") dataFu=merge(data, dataFu, by.x=c("Family","Genus", "Species"), by.y=c("Family","Genus", "Species") ) nbfamiliesFu=length(unique(dataFu[,1])) familiesFu=unique(dataFu[,1]) generaFu=unique(dataFu[,2]) # III. Creation of useful data #### data ###nb of genera and species per family in the ref data set nbfamilies = length(unique(data[,1])) nameFam = unique(data[,1]) nbGpF = data.frame(1:nbfamilies, nameFam, 1:nbfamilies, 1:nbfamilies, 1:nbfamilies) colnames(nbGpF) = c("Families", "nameFam","nbGenera","nbGeneraFu","nbSpecies") dataFu[,1] = factor(dataFu[,1], levels=levels(nameFam)) for (nbF in 1:nbfamilies) { nbGpF[nbF,3]=length(unique(data[which(data[,1]==nameFam[nbF]),2])) nbGpF[nbF,4]=length(unique(dataFu[which(dataFu[,1]==nameFam[nbF]),2])) nbGpF[nbF,5]=length(unique(data[which(data[,1]==nameFam[nbF]),3])) } ###nb of species per genera in the ref data set nbgenera = length(unique(data[,2])) nameGen = factor(unique(data[,2])) nbSpG = data.frame(1:nbgenera, nameGen, 1:nbgenera, 1:nbgenera) colnames(nbSpG) = c("Genera", "nameGen","nbSpecies", "nbSpeciesFu") dataFu[,2] = factor(dataFu[,2], levels=levels(nameGen)) for (nbG in 1:nbgenera) { nbSpG[nbG,3]=length(unique(data[which(data[,2]==nameGen[nbG]),3])) nbSpG[nbG,4]=length(unique(dataFu[which(dataFu[,2]==nameGen[nbG]),3])) } for (length in 1:n) { data[length,10]= nbGpF[data[length,1]==nbGpF[,2],3] data[length,11]= nbSpG[data[length,2]==nbSpG[,2],3] data[length,12]= nbGpF[data[length,1]==nbGpF[,2],5] } colnames(data)=c("Family", "Genus", "Species", "Reef", "Red.List.category", "EoE.rank", "EDGE.rank.IUCN100.", "EDGE.rank.Isaac.", "EDGE.rank.Pessimistic.","Nb.genera.in.family","Nb.species.in.genus","Nb.species.in.family") ### dataFU ###nb of genera per family in the dataFu data set nbfamiliesFu = length(unique(dataFu[,1])) nameFamFu = unique(dataFu[,1]) nbGpFFu = data.frame(1:nbfamiliesFu, nameFamFu, 1:nbfamiliesFu) colnames(nbGpFFu) = c("FamiliesFu", "nameFamFu","nbGeneraFu") for (nbFFu in 1:nbfamiliesFu) { nbGpFFu[nbFFu,3]=length(unique(dataFu[which(dataFu[,1]==nameFamFu[nbFFu]),2])) } ###nb of species per genera in the dataFu data set nbgeneraFu = length(unique(dataFu[,2])) nameGenFu = unique(dataFu[,2]) nbSpGFu = data.frame(1:nbgeneraFu, nameGenFu, 1:nbgeneraFu) colnames(nbSpGFu) = c("GeneraFu", "nameGenFu","nbSpeciesFu") for (nbGFu in 1:nbgeneraFu) { nbSpGFu[nbGFu,3]=length(unique(dataFu[which(dataFu[,2]==nameGenFu[nbGFu]),3])) } ################################################################################################################ ############################################ 3. Analyses ####################################################### ################################################################################################################ ####################### a. Probs of nb genera in families # x = obs. nb of sp in this group # ss = nb of sampled species # ng = nb of species in this group # n = total nb of species in the tree ###### Constants ss = length(dataFu[,1]) n=length(data[,1]) nbfamilies = length(unique(data[,1])) ##Creates the probability table for dataFu probas = as.data.frame(matrix(0, ncol=4, nrow=nbfamilies)) colnames(probas) = c("family", "PGEX", "PSEX", "nb_genera") for (fam in 1:nbfamilies) { ng=nbGpF[fam, 3] famName=nbGpF[fam, 2] nbGpFFu[,2] = factor(nbGpFFu[,2], levels=levels(famName)) for (nn in 1:length(nbGpFFu[,2])) { if (famName==nbGpFFu[nn,2]) { x=nbGpFFu[nn,3] } } famName=as.character(nbGpF[fam, 2]) probas[fam, 1] = famName probas[fam, 2] = PGEX(x, ng, n, ss) probas[fam, 3] = PSEX(x, ng, n, ss) probas[fam, 4] = ng } ##Creates the probability table for data for (fam in 1:nbfamilies) { ng=nbGpF[fam, 5] famName=nbGpF[fam, 2] nbGpFFu[,2] = factor(nbGpFFu[,2], levels=levels(famName)) for (nn in 1:length(nbGpFFu[,2])) { if (famName==nbGpFFu[nn,2]) { x=nbGpFFu[nn,3] } } famName=as.character(nbGpF[fam, 2]) probas[fam, 1] = famName probas[fam, 2] = PGEX(x, ng, n, ss) probas[fam, 3] = PSEX(x, ng, n, ss) probas[fam, 4] = ng } ######### Over-dispersion ########## #### Calculate the percentage of families that are over-represented significant = probas$PGEX < 0.05 print(paste(sum(significant) / length(probas[,2]) * 100, " % of families are over-represented")) #### Calculate the correlation between the P-value of over-dispersion (PGEX) ~ FamilySize probas$PvalBin = as.numeric(probas$PGEX > 0.5) reg=cor.test(y = probas$PvalBin, x = nbGpF[,5], method="spearman", exact=FALSE) ################################################################################################################ ################################# b. For species in genera ###### Constants ss = length(dataFu[,1]) n=length(data[,1]) nbgenera=length(unique(data[,2])) #### probas = as.data.frame(matrix(0, ncol=4, nrow=nbgenera)) colnames(probas) = c("genus", "PGEX", "PSEX", "nb_species") for (genera in 1:nbgenera) { ng=nbSpG[genera, 3] genusName=nbSpG[genera, 2] nbSpGFu[,2] = factor(nbSpGFu[,2], levels=levels(genusName)) for (nn in 1:length(nbSpGFu[,2])) { if (genusName==nbSpGFu[nn,2]) { x=nbSpGFu[nn,3] } } genusName=as.character(nbSpG[genera, 2]) probas[genera, 1] = genusName probas[genera, 2] = PGEX(x, ng, n, ss) probas[genera, 3] = PSEX(x, ng, n, ss) probas[genera, 4] = ng } ######### Over-dispersion ########## #### Calculate the percentage of genera that are over-represented significant = probas$PGEX < 0.05 print(paste(sum(significant) / length(probas[,2]) * 100, " % of genera are over-represented")) ### Calculate the correlation between the P-value of over-dispersion (PGEX) ~ GenusSize probas$PvalBin = as.numeric(probas$PGEX > 0.5) reg=cor.test(y = probas$PvalBin, x = probas$nb_species, method="spearman", exact=FALSE) ################################################################################################################ ###################### c. Check the rho rbivariate <- function(mean.x = 0, sd.x=1, mean.y=0, sd.y=1, r=.70, iter=1000) { z1 <- rnorm(iter) z2 <- rnorm(iter) x <- sqrt(1-r^2)*sd.x*z1 + r*sd.x*z2 + mean.x y <- sd.y*z2 + mean.y return(list(x,y)) } rho.7 <- rbivariate(iter=1000) #produce the bivariate data #record, give species names and order obs.7=data.frame(rho.7[[1]], c(1:1000)) colnames(obs.7)=c("obs_data", "obs_names") obs.7=obs.7[order(-obs.7$obs_data),] exp.7=data.frame(rho.7[[2]],c(1:1000)) colnames(exp.7)=c("exp_data", "exp_names") exp.7=exp.7[order(-exp.7$exp_data),] #what is the overlap? sum(exp.7$exp_names[1:50] %in% obs.7$obs_names[1:50]) ################################################################################################################ ############################ d. Yule analysis nsims=100 #change at will cor.ed=vector() size=1000 #change at will yt = pbtree(n=size, scale=1, nsim=nsims) #produce Yule trees of depth 1 for (i in 1:nsims){ tt=yt[[i]] ttt= compute.brlen(tt) #transform Yule tree BL to Grafen (standard) heights, depth = 1 cm=clade.matrix(tt) yed=ed.calc(cm)$spp$ED #ED on Yule tree cm1=clade.matrix(ttt) ted=ed.calc(cm1)$spp$ED #ED on transformed tree, same tip names and orientation cor.ed[i]=cor.test(yed, ted)$estimate } mean(cor.ed, na.rm=TRUE) hist(cor.ed) quantile(cor.ed, c(0,0.05, 0.1, 0.2, 0.5, 0.75, 1)) ################################################################################################################ ######################### e. Proportions of endangered taxa in Fu tree compared to ref tree. ## Proportion of nn DD taxa in big tree nonDD = data[data[,5]!="DD",] PropnonDD = length(nonDD[,5])/length(data[,5])*100 Endangered = nonDD[nonDD[,5]!= "NT",] Endangered2 = Endangered[Endangered[,5]!= "LC",] PropEndangered = length(Endangered2[,5])/length(nonDD[,5])*100 ## Proportion of nn DD taxa in Fu tree nonDDFu = dataFu[dataFu[,5]!="DD",] PropnonDDFu = length(nonDDFu[,5])/length(dataFu[,5])*100 EndangeredFu = nonDDFu[nonDDFu[,5]!= "NT",] Endangered2Fu = EndangeredFu[EndangeredFu[,5]!= "LC",] PropEndangeredFu = length(Endangered2Fu[,5])/length(nonDDFu[,5])*100 ########## PERMUTATION ########## #### Permutation test proportion endangered nb_iter = 100000 samplesize = 124 taxon = "Red.List.category" sampleRange = 1:nrow(data) for (i in 1:nb_iter){ sub = data[sample(c(1:nrow(data)), size=124),] nonDD = sub[sub[,5]!="DD",] Endangered = nonDD[nonDD[,5]!= "NT",] Endangered2 = Endangered[Endangered[,5]!= "LC",] PropEndangered[i] = length(Endangered2[,5])/length(nonDD[,5])*100 } obs = PropEndangeredFu if (sum(PropEndangered < obs) > sum(PropEndangered > obs)) { p.val = paste("p-value = ", 2*sum(PropEndangered > obs) / nb_iter) } else { p.val = paste("p-value = ", 2*sum(PropEndangered < obs) / nb_iter) } ### Permutation test families nb_iter = 100000 samplesize = 124 nb_taxa = c() taxon = "Family" sampleRange = 1:nrow(data) for (i in 1:nb_iter){ nb_taxa[i] = length(unique(sample(data[taxon][[1]], size=124))) } obs = length(unique(dataFu[taxon][[1]])) if (sum(nb_taxa < obs) > sum(nb_taxa > obs)) { p.val = paste("p-value = ", 2*sum(nb_taxa > obs) / nb_iter) } else { p.val = paste("p-value = ", 2*sum(nb_taxa < obs) / nb_iter) } ### Permutation test genera nb_iter = 100000 samplesize = 124 nb_taxa = c() taxon = "Genus" sampleRange = 1:nrow(data) for (i in 1:nb_iter){ nb_taxa[i] = length(unique(sample(data[taxon][[1]], size=124))) } obs = length(unique(dataFu[taxon][[1]])) if (sum(nb_taxa < obs) > sum(nb_taxa > obs)) { p.val = paste("p-value = ", 2*sum(nb_taxa > obs) / nb_iter) } else { p.val = paste("p-value = ", 2*sum(nb_taxa < obs) / nb_iter) } ### Permutation test Endangered Status nb_iter = 100000 samplesize = 124 nb_taxa = c() taxon = "Red.List.category" data$Red.List.category = as.character(data$Red.List.category) data$Red.List.category[data$Red.List.category == "DD"] = 0 data$Red.List.category[data$Red.List.category == "LC"] = 0 data$Red.List.category[data$Red.List.category == "NT"] = 1 data$Red.List.category[data$Red.List.category == "VU"] = 2 data$Red.List.category[data$Red.List.category == "EN"] = 3 data$Red.List.category[data$Red.List.category == "CR"] = 4 data$Red.List.category = as.factor(data$Red.List.category) dataFu$Red.List.category = as.character(dataFu$Red.List.category) dataFu$Red.List.category[dataFu$Red.List.category == "DD"] = 0 dataFu$Red.List.category[dataFu$Red.List.category == "LC"] = 0 dataFu$Red.List.category[dataFu$Red.List.category == "NT"] = 1 dataFu$Red.List.category[dataFu$Red.List.category == "VU"] = 2 dataFu$Red.List.category[dataFu$Red.List.category == "EN"] = 3 dataFu$Red.List.category[dataFu$Red.List.category == "CR"] = 4 dataFu$Red.List.category = as.factor(dataFu$Red.List.category) sampleRange = 1:nrow(data) for (i in 1:nb_iter){ nb_taxa[i] = sum(sample(as.numeric(data[,5]), size=124)) } obs = sum(as.numeric(dataFu[,taxon])) if (sum(nb_taxa < obs) > sum(nb_taxa > obs)) { p.val = paste("p-value = ", 2*sum(nb_taxa > obs) / nb_iter) } else { p.val = paste("p-value = ", 2*sum(nb_taxa < obs) / nb_iter) }