#Study:Does genome-wide variation and putatively adaptive variation identify the same set of distinct populations? #Script author: Avneet K. Chhina # This script has the code for all figures and tables as well as statistical analysis performed on the data from 34 species. # load packages #### library(tidyverse) library(ggplot2) library(palmerpenguins) library(ggplot2) library(rphylopic) library(grid) library(ggpubr) library(dunn.test) library(patchwork) library(statip) library(car) library(tidyverse) library(ggplot2) library(palmerpenguins) library(ggplot2) library(rphylopic) library(grid) library(gridExtra) library(betareg) library(rstatix) # set your working directory # all_adapt_analysis <- read.csv("Details_of_34_studies.csv") # we will use this datasheet for all Tables, Figures, and statistical analysis below. #### Table 3.1: Information on important raw variables#### table_3_1 <-all_adapt_analysis %>% select(Study.ID, organism,number_of_total_snps,number_of_adapt_snps, individuals, populations,Method_Type_GEA_GPA_FST, Spearman.using.sig.PCs) %>% arrange(Spearman.using.sig.PCs) %>% mutate(study_number = row_number()) %>% select(study_number, Study.ID, everything()) %>% rename("study number" = study_number) %>% rename("study ID" = Study.ID) %>% rename("number of genome-wide SNPs" = number_of_total_snps) %>% rename("number of putatively adaptive SNPs" = number_of_adapt_snps) %>% rename("number of individuals" = individuals) %>% rename("number of populations" = populations) %>% rename("method type" = Method_Type_GEA_GPA_FST) %>% select(-Spearman.using.sig.PCs) #### Figure 3.1: Distribution of studies showing the total number of individuals, populations for each study along with their classification by taxa. #### ind<- ggplot(all_adapt_analysis, aes(x = individuals)) + geom_histogram(binwidth = 40, fill = "black", color = "black") + labs(x = "number of individuals in a study",y = "number of studies") + theme_minimal() + theme(aspect.ratio = 1.0,panel.background = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1),axis.ticks = element_line(color = "black"),axis.text = element_text(color = "black"),axis.title = element_text(color = "black"),axis.title.y = element_text(vjust = 0.7, size = 20),axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20),axis.text.y = element_text(size = 20, hjust = 0.5),legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_x_continuous(breaks = seq(0, 1500, by = 250), labels = seq(0, 1500, by = 250), expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) ind pop<- ggplot(all_adapt_analysis, aes(x = populations)) + geom_histogram(binwidth = 10, fill = "black", color = "black") + labs(x = "number of populations in a study",y = "number of studies") + theme_minimal() + theme(aspect.ratio = 1.0,panel.background = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1),axis.ticks = element_line(color = "black"),axis.text = element_text(color = "black"),axis.title = element_text(color = "black"),axis.title.y = element_text(vjust = 0.7, size = 20),axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20),axis.text.y = element_text(size = 20, hjust = 0.5),legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_x_continuous(breaks = seq(0, 100, by = 10), labels = seq(0, 100, by = 10), expand = c(0, 0)) + scale_y_continuous(breaks = seq(0, 20, by = 2), labels = seq(0, 20, by = 2), expand = c(0, 0)) pop classification<- ggplot(all_adapt_analysis, aes(x = classification)) + geom_histogram(stat = "count", fill = "black", color = "black") + labs(x = "classification",y = "number of studies") + theme_minimal() + theme(aspect.ratio = 1.0,panel.background = element_blank(),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1),axis.ticks = element_line(color = "black"),axis.text = element_text(color = "black"),axis.title = element_text(color = "black"),axis.title.y = element_text(vjust = 0.7, size = 20),axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20, angle = 45, vjust = 0.70, hjust = 0.60),axis.text.y = element_text(size = 20, hjust = 0.5),legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_y_continuous(breaks = seq(0, 9, by = 1), labels = seq(0, 9, by = 1), expand = c(0, 0)) classification classification <- classification + theme(axis.title.y = element_blank()) pop <- pop + theme(axis.title.y = element_blank()) label_A <- textGrob("A", x = 0.01, y = 0.9, hjust = 0, vjust = 0.5, gp = gpar(fontsize = 20, fontface = "bold")) label_B <- textGrob("B", x = 0.01, y = 0.9, hjust = 0, vjust = 0.5, gp = gpar(fontsize = 20, fontface = "bold")) label_C <- textGrob("C", x = 0.01, y = 0.9, hjust = 0, vjust = 0.5, gp = gpar(fontsize = 20, fontface = "bold")) grid.arrange( arrangeGrob(label_A, ind, ncol = 2, widths = c(0.1, 1)), arrangeGrob(label_B, pop, ncol = 2, widths = c(0.1, 1)), arrangeGrob(label_C, classification, ncol = 2, widths = c(0.1, 1)), ncol = 3, widths = c(1, 1, 1) ) #### Figure 3.2 Log distribution of genome-wide and putatively adaptive SNPs for each study. #### all_adapt_analysis_1 <- all_adapt_analysis %>% arrange(Spearman.using.sig.PCs) %>% mutate(study_number = row_number()) %>% select(study_number, Study.ID, everything()) %>% mutate(number_of_total_snps = number_of_total_snps) %>% arrange(proportion_of_all_adaptive) %>% mutate(log_prop = log(proportion_of_all_adaptive)) p <- ggplot(all_adapt_analysis_1, aes(x = log(proportion_of_all_adaptive))) + geom_point(aes(y = log(number_of_total_snps), color = "number_of_total_snps"), size = 4) + geom_point(aes(y = log(number_of_adapt_snps), color = "number_of_adapt_snps"), size = 4) + geom_segment(aes(xend = log(proportion_of_all_adaptive), y = log(number_of_total_snps), yend = log(number_of_adapt_snps)), color = "black") + scale_color_manual(name = "Variables", values = c(number_of_total_snps = "#00798c", number_of_adapt_snps = "#637029")) + labs(y = "log(number of SNPs)", x = "log(proportion of putatively adaptive SNPs)") + geom_text(aes(x = log(proportion_of_all_adaptive), y = log(number_of_total_snps), label = study_number), vjust = -1.91) + theme_minimal() + theme(aspect.ratio = 1.0, panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"), axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20,hjust = 0.5, margin = margin(r = 2)), axis.text.x = element_text(size = 20, hjust = 0.5, margin = margin(r = 2, t = 2)), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + scale_y_continuous(breaks = seq(0, 14, by = 1)) + scale_x_continuous(breaks = seq(-20, 10, by = 1.0)) p #### Figure 3.3 was made in PowerPoint. #### Figure 3.4 Prioritization agreement (i.e., Observed Spearman correlation) of population distinctness scores calculated using genome-wide and putatively adaptive SNPs #### spearman_correlation_data_ci <- all_adapt_analysis %>% dplyr::select(Spearman.using.sig.PCs, average_spearman_from_bootstrap, bootstrap_ci_5., bootstrap_ci_95.) %>% arrange(Spearman.using.sig.PCs) %>% mutate(study_number = row_number()) %>% mutate(Spearman.using.sig.PCs = as.numeric(Spearman.using.sig.PCs)) %>% mutate(average_spearman_from_bootstrap = as.numeric(average_spearman_from_bootstrap)) %>% mutate(mean_expected = mean(average_spearman_from_bootstrap)) %>% mutate(mean_observed = mean(Spearman.using.sig.PCs)) %>% as.data.frame() ggplot(spearman_correlation_data_ci, aes(x = reorder(study_number, Spearman.using.sig.PCs), y = Spearman.using.sig.PCs)) + geom_point(aes(y = Spearman.using.sig.PCs), color = "darkgreen", size = 7) + geom_errorbar(aes(ymin = bootstrap_ci_5., ymax = bootstrap_ci_95.), width = 0.2) + labs(x = "study", y = expression("Genome-wide vs. putatively adaptive R")) + theme_minimal() + theme(aspect.ratio = 1.0, panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"), axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20,hjust = 0.5, margin = margin(r = 2)), axis.text.x = element_text(size = 20, hjust = 0.5, margin = margin(r = 2, t = 2)), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + coord_cartesian(clip = "off") + scale_y_continuous(breaks = seq(-2.0, 2.0, by = 0.2), labels = seq(-2.0, 2.0, by = 0.2)) +scale_x_discrete(breaks = seq(0, 34, by = 2), labels = seq(0, 34, by = 2))+ geom_hline(yintercept = 0.00, linetype = "dashed", color = "black", size = 0.75) #### Figure 3.5 Observed and expected Spearman correlation of popualtion distinctness scores, reflecting the predictive power of adaptive SNPs#### spearman_correlation_data <- all_adapt_analysis %>% dplyr::select(Study.ID, Spearman.using.sig.PCs, average_spearman_from_bootstrap, bootstrap_ci_5.) %>% # "Spearman.using.sig.PCs" is the observed Spearman correlation and "average_spearman_from_bootstrap" is the expected correlation arrange(Spearman.using.sig.PCs) %>% mutate(study_number = row_number()) %>% mutate(Spearman.using.sig.PCs = as.numeric(Spearman.using.sig.PCs)) %>% mutate(average_spearman_from_bootstrap = as.numeric(average_spearman_from_bootstrap)) %>% mutate(mean_expected = mean(average_spearman_from_bootstrap)) %>% mutate(mean_observed = mean(Spearman.using.sig.PCs)) %>% as.data.frame() ggplot(spearman_correlation_data, aes(x = reorder(study_number, Spearman.using.sig.PCs), y = Spearman.using.sig.PCs)) + geom_point(color = "darkgreen", size = 7, aes(shape = factor(bootstrap_ci_5. > 0)), stroke = 1.5) + geom_point(aes(y = average_spearman_from_bootstrap), color = "orange", size = 7) + geom_segment(aes(xend = reorder(study_number, Spearman.using.sig.PCs), y = Spearman.using.sig.PCs, yend = average_spearman_from_bootstrap), color = "black") + scale_shape_manual(values = c(1, 16)) + # 1 = hollow circle, 16 = filled circle labs(x = "study", y = "Observed and expected R") + theme_minimal() + theme(aspect.ratio = 1.0, panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"), axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20,hjust = 0.5, margin = margin(r = 2)), axis.text.x = element_text(size = 20, hjust = 0.5, margin = margin(r = 2, t = 2)), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + coord_cartesian(clip = "off") + scale_y_continuous(breaks = seq(-1.0, 1.0, by = 0.2), labels = seq(-1.0, 1.0, by = 0.2)) +scale_x_discrete(breaks = seq(0, 34, by = 2), labels = seq(0, 34, by = 2))+ geom_hline(yintercept = 0.62, linetype = "dashed", color = "darkgreen", size = 0.75) + geom_hline(yintercept = 0.53, linetype = "dashed", color = "orange", size = 0.75) + geom_text(aes(label = c("0.62")), x=34.40, y = 0.63, hjust = 0, vjust = 1, size = 9, color= "darkgreen") + geom_text(aes(label = c("0.53")), x=34.40, y = 0.54, hjust = 0, vjust = 1, size = 9, color= "orange") + geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.75) #### Statistical test for Figure 3.5: to test the difference between observed and expected correlation. #### ## Part 1: Without removing outlier ## observed_minus_expected <- all_adapt_analysis %>% select(Study.ID, Spearman.using.sig.PCs,average_spearman_from_bootstrap, difference_between_observed_and_average_spearman) %>% mutate(difference_between_observed_expected = Spearman.using.sig.PCs - average_spearman_from_bootstrap) %>% mutate(mean_difference = mean(difference_between_observed_expected)) %>% mutate(sd_difference = sd(difference_between_observed_expected)) %>% mutate(se_difference = sd_difference/ sqrt(34)) %>% mutate(t = mean_difference/se_difference) #Paired t-test assumptions #The two groups are paired. (Assumption met: Paired by Study ID) #No significant outliers in the difference between the two related groups. (Assumption met. There are no extreme outliers, but 3 mild outliers. I also do analysis with removing those (see part 2)) #Normality. the difference of pairs follow a normal distribution (Assumption met) shapiro.test(x = observed_minus_expected$difference_between_observed_expected) t.test(observed_minus_expected$Spearman.using.sig.PCs, observed_minus_expected$average_spearman_from_bootstrap, paired = TRUE, alternative = "two.sided") ## Part 2: without removing outlier studies ## observed_minus_expected_outliers <- observed_minus_expected %>% identify_outliers(difference_between_observed_and_average_spearman) # This is what I used to identify outliers. There are 3 outliers, but not extreme. observed_minus_expected_removing_outliers <- observed_minus_expected %>% dplyr::filter(!Study.ID %in% c("Roffler et al., 2016", "Benestan et al., 2016", "Cullingham et al., 2014")) t.test(observed_minus_expected_removing_outliers$Spearman.using.sig.PCs, observed_minus_expected_removing_outliers$average_spearman_from_bootstrap, paired = TRUE, alternative = "two.sided") #### Figure 3.6 Comparing median minor allele frequencies of putatively random and adaptive SNPs across studies#### spearman_correlation_data_re_random_adaptive_neutral <- all_adapt_analysis %>% dplyr::select(Study.ID, Spearman.using.sig.PCs, average_spearman_from_bootstrap, difference_between_observed_and_average_spearman, allele_frequency_2_corrected_random_to_adaptive_mann_whittney, allele_frequency_2_corrected_neutral_to_adaptive_corrected_mann_whittney, neutral_2_median, adaptive_2_median,random_2_median) %>% arrange(Spearman.using.sig.PCs) %>% mutate(new_study_number = row_number()) %>% mutate(Spearman.using.sig.PCs = as.numeric(Spearman.using.sig.PCs)) %>% mutate(average_spearman_from_bootstrap = as.numeric(average_spearman_from_bootstrap)) %>% select(new_study_number, Study.ID, everything()) %>% as.data.frame() ## comparing random and adaptive median minor allele frequencies ## random_adaptive_median<- spearman_correlation_data_re_random_adaptive_neutral %>% select(Study.ID, adaptive_2_median, random_2_median, new_study_number,Spearman.using.sig.PCs, allele_frequency_2_corrected_random_to_adaptive_mann_whittney) %>% mutate(difference_adaptive_random_median = adaptive_2_median - random_2_median) # 19 out of 34 "allele_frequency_2_corrected_random_to_adaptive_mann_whittney" are above P = 0.05 (these p-values are already bonferroni corrected) ggplot(random_adaptive_median, aes(x = reorder(new_study_number, Spearman.using.sig.PCs), y = adaptive_2_median)) + geom_point(aes(y = adaptive_2_median), color = "darkgreen", size = 7) + geom_point(aes(y = random_2_median), color = "orange", size = 7) + geom_segment(aes(xend = reorder(new_study_number, Spearman.using.sig.PCs), y = adaptive_2_median, yend = random_2_median), color = "black") + labs(x = "study", y = expression(atop("median minor allele frequencies of", paste("putatively adaptive and random SNPs")))) + theme_minimal() + theme(aspect.ratio = 1.0, panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"), axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20,hjust = 0.5, margin = margin(r = 2)), axis.text.x = element_text(size = 20, hjust = 0.5, margin = margin(r = 2, t = 2)), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + coord_cartesian(clip = "off") + scale_y_continuous(breaks = seq(-1.0, 1.0, by = 0.2), labels = seq(-1.0, 1.0, by = 0.2)) +scale_x_discrete(breaks = seq(0, 34, by = 2), labels = seq(0, 34, by = 2)) + geom_text( aes(label = ifelse(allele_frequency_2_corrected_random_to_adaptive_mann_whittney <= 0.05, "*", "")), vjust = -0.3, size = 8, color = "black") ## statistical test comparing differences in median minor allele frequencies## ## Checking all assumptions of paired t-test## #The two groups are paired. (Assumption met: Paired by Study ID) #No significant outliers in the difference between the two related groups. (Assumption met) #Normality. the difference of pairs follow a normal distribution (Assumption met) shapiro.test(x = random_adaptive_median$difference_adaptive_random_median) t.test(random_adaptive_median$adaptive_2_median, random_adaptive_median$random_2_median, paired = TRUE, alternative = "greater") ## Comparing neutral and adaptive median minor allele frequencies ## neutral_adaptive_median<- spearman_correlation_data_re_random_adaptive_neutral %>% select(Study.ID, adaptive_2_median, neutral_2_median, new_study_number,Spearman.using.sig.PCs, allele_frequency_2_corrected_neutral_to_adaptive_corrected_mann_whittney) %>% mutate(difference_adaptive_neutral = adaptive_2_median - neutral_2_median) ## 17 out of 30 "allele_frequency_2_corrected_neutral_to_adaptive_corrected_mann_whittney" are above P = 0.05 (these p-values are already bonferroni corrected) ## statistical test ## (one tail t-test) #The two groups are paired. (Assumption met: Paired by Study ID) #No significant outliers in the difference between the two related groups. (Assumption met) #Normality. the difference of pairs follow a normal distribution (Assumption met) shapiro.test(x = neutral_adaptive_median$difference_adaptive_neutral) t.test(neutral_adaptive_median$adaptive_2_median, neutral_adaptive_median$neutral_2_median, paired = TRUE, alternative = "greater") ### Figure 3.7 Correlation between Observed minus expected R and adaptive minus random allele frequencies#### same_dataset <- all_adapt_analysis %>% select(Study.ID,Spearman.using.sig.PCs,average_spearman_from_bootstrap,neutral_2_median, difference_between_observed_and_average_spearman,adaptive_2_median,random_2_median) %>% arrange(Spearman.using.sig.PCs) %>% mutate(study_number = row_number()) %>% mutate(Spearman.using.sig.PCs = as.numeric(Spearman.using.sig.PCs)) %>% mutate(differeence_observed_expected = Spearman.using.sig.PCs-average_spearman_from_bootstrap) %>% mutate(difference_adaptive_random_median = adaptive_2_median - random_2_median) %>% mutate(difference_adaptive_neutral_median = adaptive_2_median - neutral_2_median) %>% select(study_number, everything()) %>% mutate(sign = ifelse(difference_adaptive_random_median > 0, 1, 0)) ## Correlation test ## cor.test(same_dataset$difference_adaptive_random_median,same_dataset$differeence_observed_expected, method = "spearman") # exact same result with Spearman and Pearson ## plot ## ggplot(data = same_dataset, aes(x = difference_adaptive_random_median, y = difference_between_observed_and_average_spearman,color = "Study_ID", fill = "Study_ID"), size = 4, order=as.numeric(same_dataset$difference_between_observed_and_average_spearman)) + xlab("adaptive minus random allele frequencies") + ylab('observed minus expected R')+ geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + expand_limits(x = 0, y = 0) + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029", "#637029")) + scale_fill_manual(values = c("#637029", "#637029")) + annotate("text", x = 0.2, y = 0.6, label = "R = -0.12\n P = 0.50", size = 7) + geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.75) ###### Examining the impact of covariates on Observed Spearman correlation and Observed minus expected correlation #### ### There are six covariates: study-wide FST, account for neutral population structure or demography or reduces false positives, number of genome-wide SNPs, number of putatively adaptive SNPs,proportion of adaptive SNPs, and method Type (Fst vs. non-FST) #### assumptions of pearson correlation### #https://usq.pressbooks.pub/statisticsforresearchstudents/chapter/correlation-assumptions/ #1. The two variables (the variables of interest) need to be using a continuous scale. (assumption met) #2. The variables should be normally or near-to-normally distributed. (after transformation, assumption is met) #3. The two variables of interest should have a linear relationship, which you can check with a scatterplot. (not really for some variables) #4. There should be no spurious outliers. (met) ## spearman correlation assumptions: #https://www.statisticssolutions.com/free-resources/directory-of-statistical-analyses/correlation-pearson-kendall-spearman/#:~:text=The%20assumptions%20of%20the%20Spearman,related%20to%20the%20other%20variable.&text=Key%20Terms-,Effect%20size%3A%20Cohen's%20standard%20may%20be%20used%20to%20evaluate%20the,relationship%2C%20or%20the%20effect%20size. # monotonic relationship and ordinal data ###Study-wide FST #### ### Impact of Study-wide FST on Spearman correlation ### fst_hierstat_schluter_way <- all_adapt_analysis%>% select(Study.ID, Weir_cockerham_software_wc_hierfstat, difference_between_observed_and_average_spearman, Spearman.using.sig.PCs) %>% arrange(Spearman.using.sig.PCs) %>% mutate(study_number = row_number()) %>% select(study_number, Study.ID, everything()) %>% mutate(log_fst = log(Weir_cockerham_software_wc_hierfstat)) #%>% ## plot ## weir_fst <- ggplot(data = fst_hierstat_schluter_way, aes(x = log(Weir_cockerham_software_wc_hierfstat), y = asin(sqrt(Spearman.using.sig.PCs)), color = "Study_ID", fill = "Study_ID"), size = 4, order=as.numeric(asin(sqrt(fst_hierstat_schluter_way$Spearman.using.sig.PCs)))) + xlab("study-wide FST") + ylab('arcsine(observed correlation)')+geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("darkgreen", "darkgreen")) + scale_fill_manual(values = c("darkgreen", "darkgreen")) + annotate("text", x = -1, y = 1.5, label = "R = -0.01\n P = 0.95", size = 7) weir_fst ## correlation test ## cor.test(fst_hierstat_schluter_way$Weir_cockerham_software_wc_hierfstat,fst_hierstat_schluter_way$Spearman.using.sig.PCs, method = "spearman") ### Impact of study-wide FST on Observed minus expected R ### ## plot ## weir_fst_difference <- ggplot(data = fst_hierstat_schluter_way, aes(x = log(Weir_cockerham_software_wc_hierfstat), y = difference_between_observed_and_average_spearman, color = "Study.ID", fill = "Study.ID"), size = 4, order=as.numeric(difference_between_observed_and_average_spearman)) + xlab(expression(log("study-wide F"["ST"])))+ ylab('observed minus expected R')+geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20))+ scale_colour_manual(values = c("#637029", "#637029")) + scale_fill_manual(values = c("#637029", "#637029")) + geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.75) + annotate("text", x = -1, y = 0.5, label = "R = -0.46\n P = 0.007", size = 7) weir_fst_difference ## correlation test ## cor.test(log(fst_hierstat_schluter_way$Weir_cockerham_software_wc_hierfstat),fst_hierstat_schluter_way$difference_between_observed_and_average_spearman, method = "spearman") ### Figure 3.9: Impact of study-wide FST on variance in Shapley Values ### fst_shapley_data <- all_adapt_analysis%>% select(Study.ID,var_shapley_raw_genome_wide, Weir_cockerham_software_wc_hierfstat, difference_between_observed_and_average_spearman) %>% mutate(log_var_shapley_raw_genome_wide = log(var_shapley_raw_genome_wide)) %>% mutate(log_fst = log(Weir_cockerham_software_wc_hierfstat))# no extreme outliers ## correlation test ## cor.test(log(fst_shapley_data$var_shapley_raw_genome_wide),log(fst_shapley_data$Weir_cockerham_software_wc_hierfstat), method = "spearman") ## plot ## ggplot(fst_shapley_data, aes(x = log(Weir_cockerham_software_wc_hierfstat), y = log(var_shapley_raw_genome_wide))) + geom_point(size = 7) + xlab(expression(paste("log(study-wide ", F[ST],")"))) + ylab('log(variance in Shapley value)') + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029")) + scale_fill_manual(values = c("#637029")) + annotate("text", x = -1, y = 5.3, label = "R = 0.53\n P = 0.002", size = 7) ### Impact of variance in Shapley Values on Observed minus expected R ### ## correlation test ## cor.test(log(fst_shapley_data$var_shapley_raw_genome_wide),fst_shapley_data$difference_between_observed_and_average_spearman, method = "spearman") ## plot ## ggplot(fst_shapley_data, aes(x = log(var_shapley_raw_genome_wide), y = difference_between_observed_and_average_spearman)) + geom_point(size = 7) + ylab("Observed minus expected R") + xlab('log(variance in Shapley value)') + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029")) + scale_fill_manual(values = c("#637029")) + annotate("text", x = -1, y = 5.3, label = "R = -0.25\n P = 0.16", size = 7) #### Account for Neutral population structure or demography or False positives #### neutral_pop_structure<- all_adapt_analysis %>% select(Spearman.using.sig.PCs, difference_between_observed_and_average_spearman, account_for_nps_or_dh_or_false_positives, Study.ID) %>% rename(neutral_pop_str_1 = account_for_nps_or_dh_or_false_positives) %>% mutate(neutral_pop_str = ifelse(neutral_pop_str_1 == 0.0, "no", ifelse(neutral_pop_str_1 == 0.5, "mixed", ifelse(neutral_pop_str_1 == 1.0, "yes", NA)))) ### Assumptions of one-way anova ### ##https://rpubs.com/Pramo_Udaya/639505#:~:text=To%20assess%20this%20assumption%20in,using%20the%20function%20residuals()%20. ##Homogeneity of variance (Homoscedasticity) (assumption met) ##Normality of residuals (errors) # ANOVA assumes that the normality of the errors (or residuals) not necessarily of the observation. ##Independence (assumption met) ### Impact of neutral population structure on Observed Spearman correlation ### ## plot ## ggplot(neutral_pop_structure, aes(x = neutral_pop_str, y = asin(sqrt(Spearman.using.sig.PCs)), color = "Study.ID", fill = "Study.ID")) + geom_point(size = 6) +xlab("accounting for neutral pop structure") + ylab('obs - exp R') + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("darkgreen")) + scale_fill_manual(values = c("darkgreen"))+ annotate("text", x = 3, y = 1.4, label = "P = 0.56", size = 7) # without transformation # model_1<- aov(neutral_pop_str_1 ~Spearman.using.sig.PCs, data = neutral_pop_structure) summary(model_1) # with transformation # model_1<- aov(neutral_pop_str_1 ~asin(sqrt(Spearman.using.sig.PCs)), data = neutral_pop_structure) summary(model_1) ### Impact of accounting for neutral population structure or demography or reduction in false positives on observed minus expected R### ggplot(neutral_pop_structure, aes(x = neutral_pop_str, y = difference_between_observed_and_average_spearman, color = "Study.ID", fill = "Study.ID")) + geom_point(size = 6) +xlab("accounting for neutral pop structure") + ylab('obs - exp R') + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029")) + scale_fill_manual(values = c("#637029")) + annotate("text", x = 3, y = 0.5, label = "P = 0.45", size = 7) model<- aov(neutral_pop_str_1 ~difference_between_observed_and_average_spearman, data = neutral_pop_structure) summary(model) #### Number of genome-wide SNPs #### number_of_genome_wide_snps <- all_adapt_analysis%>% select(Study.ID, number_of_total_snps, difference_between_observed_and_average_spearman, Spearman.using.sig.PCs) %>% arrange(Spearman.using.sig.PCs) %>% mutate(new_study_number = row_number()) %>% select(new_study_number, Study.ID, everything()) %>% mutate(log_genome_wide_snps= log(number_of_total_snps)) ## plot ## total_snps <- ggplot(data = number_of_genome_wide_snps, aes(x = log(number_of_total_snps), y = asin(sqrt(Spearman.using.sig.PCs)), color = "Study_ID", fill = "Study_ID"), size = 4, order=as.numeric(asin(sqrt(number_of_genome_wide_snps$Spearman.using.sig.PCs)))) + xlab("log(number of total SNPs)") + ylab('arcsine(observed correlation)')+ geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("darkgreen", "darkgreen")) + scale_fill_manual(values = c("darkgreen", "darkgreen")) + annotate("text", x = 10.5, y = 1.4, label = "R = -0.20\n P = 0.26", size = 7) total_snps ## correlation test ## cor.test(log(number_of_genome_wide_snps$number_of_total_snps),asin(sqrt(number_of_genome_wide_snps$Spearman.using.sig.PCs)), method = "spearman") ### Impact of number of genome-wide SNPs on Observed minus expected R### ## plot ## total_snps_diff <- ggplot(data = number_of_genome_wide_snps, aes(x = log(number_of_total_snps), y = difference_between_observed_and_average_spearman, color = "Study_ID", fill = "Study_ID"), size = 4, order=as.numeric(number_of_genome_wide_snps$difference_between_observed_and_average_spearman)) + xlab("log(number of total SNPs)") + ylab('Observed minus expected R')+ geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029", "#637029")) + scale_fill_manual(values = c("#637029", "#637029")) + annotate("text", x = 10.5, y = 0.5, label = "R = -0.17\n P = 0.34", size = 7) total_snps_diff ## correlation test ## cor.test(log(number_of_genome_wide_snps$number_of_total_snps),number_of_genome_wide_snps$difference_between_observed_and_average_spearman, method = "spearman") #### Number of putatively adaptive SNPs #### number_of_adaptive_snps <- all_adapt_analysis%>% select(Study.ID, number_of_adapt_snps, difference_between_observed_and_average_spearman, Spearman.using.sig.PCs) %>% arrange(Spearman.using.sig.PCs) %>% mutate(new_study_number = row_number()) %>% select(new_study_number, Study.ID, everything()) %>% mutate(log_number_of_adapt_snps= log(number_of_adapt_snps)) ## plot ## adapt_snps <- ggplot(data = number_of_adaptive_snps, aes(x = log(number_of_adapt_snps), y = asin(sqrt(Spearman.using.sig.PCs)), color = "Study_ID", fill = "Study_ID"), size = 4, order=as.numeric(asin(sqrt(Spearman.using.sig.PCs)))) + xlab("log(number of putatively adaptive SNPs)") + ylab('arcsine(observed correlation)')+ geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("darkgreen", "darkgreen")) + scale_fill_manual(values = c("darkgreen", "darkgreen")) + annotate("text", x = 6, y = 1.5, label = "R = 0.32\n P = 0.06", size = 7) adapt_snps ## correlation test ## cor.test(number_of_adaptive_snps$log_number_of_adapt_snps,asin(sqrt(number_of_adaptive_snps$Spearman.using.sig.PCs)), method = "spearman") ### Impact of putatively adaptive SNPs on Observed minus expected R ### ## plot ## adaptive_snps_graph <- ggplot(data = number_of_adaptive_snps, aes(x = log(number_of_adapt_snps), y = difference_between_observed_and_average_spearman, color = "Study.ID", fill = "Study.ID"), size = 4, order=as.numeric(difference_between_observed_and_average_spearman)) + xlab(expression(log("number of adaptive SNPs")))+ ylab('observed minus expected R')+geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029", "#637029")) + scale_fill_manual(values = c("#637029", "#637029")) + geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.75) + annotate("text", x = 6, y = 0.5, label = "R = -0.37\n P = 0.03", size = 7) adaptive_snps_graph ## correlation test ## cor.test(log(number_of_adaptive_snps$number_of_adapt_snps),number_of_adaptive_snps$difference_between_observed_and_average_spearman, method = "spearman") #### Proportion of adaptive SNPs #### number_of_proportion_adapt_snps <- all_adapt_analysis%>% select(Study.ID, proportion_of_all_adaptive, difference_between_observed_and_average_spearman, Spearman.using.sig.PCs) %>% arrange(Spearman.using.sig.PCs) %>% mutate(new_study_number = row_number()) %>% select(new_study_number, Study.ID, everything()) %>% mutate(log_proportion_of_all_adaptive= log(proportion_of_all_adaptive)) ## plot ## prop_1 <- ggplot(data = number_of_proportion_adapt_snps, aes(x = log(proportion_of_all_adaptive), y = Spearman.using.sig.PCs,color = "Study_ID", fill = "Study_ID"), size = 7, order=as.numeric(number_of_proportion_adapt_snps$Spearman.using.sig.PCs)) + xlab("log(proportion of putatively adaptive SNPs)") + ylab("Genome-wide vs. putatively adaptive R") + geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + expand_limits(x = 0, y = 0) + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("darkgreen", "darkgreen")) + scale_fill_manual(values = c("darkgreen", "darkgreen")) + annotate("text", x = -1.0, y = 0.955, label = "R = 0.500\n P = 0.003", size = 7) prop_1 ## correlation test ## cor.test(log(number_of_proportion_adapt_snps$proportion_of_all_adaptive),number_of_proportion_adapt_snps$Spearman.using.sig.PCs, method = "spearman") ### Impact of proportion of putatively adaptive SNPs ### ## plot ## prop <- ggplot(data = number_of_proportion_adapt_snps, aes(x = log(proportion_of_all_adaptive), y = difference_between_observed_and_average_spearman,color = "Study_ID", fill = "Study_ID"), size = 4, order=as.numeric(number_of_proportion_adapt_snps$difference_between_observed_and_average_spearman)) + xlab("log(proportion of adaptive SNPs)") + ylab('observed minus expected R')+ geom_point(size = 7) + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + expand_limits(x = 0, y = 0) + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029", "#637029")) + scale_fill_manual(values = c("#637029", "#637029")) + annotate("text", x = -1.0, y = 0.6, label = "R = -0.17\n P = 0.35", size = 7) prop ## correlation test ## cor.test(log(number_of_proportion_adapt_snps$proportion_of_all_adaptive),number_of_proportion_adapt_snps$difference_between_observed_and_average_spearman, method = "spearman") #### Method Type (FST vs. Non-FST)#### ### Impact of Method Type on Observed Spearman correlation ### method_type_fst_non_fst <- all_adapt_analysis%>% select(Study.ID, Method_Type_FST_NON_FST, difference_between_observed_and_average_spearman, Spearman.using.sig.PCs) %>% arrange(Spearman.using.sig.PCs) %>% mutate(new_study_number = row_number()) %>% select(new_study_number, Study.ID, everything()) ### Assumptions of one-way anova ### ##https://rpubs.com/Pramo_Udaya/639505#:~:text=To%20assess%20this%20assumption%20in,using%20the%20function%20residuals()%20. ##Homogeneity of variance (Homoscedasticity) (assumption met) ##Normality of residuals (errors) # ANOVA assumes that the normality of the errors (or residuals) not necessarily of the observation. ##Independence (assumption met) ## without transformation model_spearman_method <- aov(Spearman.using.sig.PCs ~ Method_Type_FST_NON_FST, data = method_type_fst_non_fst) summary(model_spearman_method) ## With transformation model_spearman_method_transformed <- aov(asin(sqrt(Spearman.using.sig.PCs)) ~ Method_Type_FST_NON_FST, data = method_type_fst_non_fst) summary(model_spearman_method_transformed) ## plot ## raw_correlation_method_type_fst_non_fst <- ggplot(method_type_fst_non_fst, aes(x = factor(Method_Type_FST_NON_FST), y = Spearman.using.sig.PCs)) + geom_boxplot(fill = "darkgreen") +xlab("method type") + ylab("genome-wide vs. putatively adaptive R") + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + expand_limits(x = 0, y = 0) + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("darkgreen")) + scale_fill_manual(values = c("darkgreen")) + coord_cartesian(xlim = c(1.0, 2.0)) + scale_x_discrete(labels = c(bquote(F[ST]), bquote("non-"~F[ST]))) + annotate("text", x = 2.0, y = 0.95, label = "P = 0.02", size = 7) raw_correlation_method_type_fst_non_fst ### Impact of Method Type (FST or non-FST) on Observed minus Expected R ### ## plots ## raw_correlation_method_type_fst_non_fst <- ggplot(method_type_fst_non_fst, aes(x = factor(Method_Type_FST_NON_FST), y = difference_between_observed_and_average_spearman)) + geom_boxplot(fill = "#637029") +xlab("method type") + ylab('observed minus expected R') + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029")) + scale_fill_manual(values = c("#637029")) + annotate("text", x = 2.0, y = 0.5, label = "P = 0.38", size = 7) raw_correlation_method_type_fst_non_fst ### statistical analysis ### model_spearman_method_diff <- aov(difference_between_observed_and_average_spearman ~ Method_Type_FST_NON_FST, data = method_type_fst_non_fst) summary(model_spearman_method_diff) ### Proportion of adaptive SNPs and Method Type (FST vs. Non-FST) impact each other### model_spearman_method_prop <- aov(log(proportion_of_all_adaptive) ~ Method_Type_FST_NON_FST, data = all_adapt_analysis) summary(model_spearman_method_prop) raw_correlation_method_type_fst_non_fst_prop <- ggplot(all_adapt_analysis, aes(x = factor(Method_Type_FST_NON_FST), y = log(proportion_of_all_adaptive))) + geom_boxplot(fill = "white") +xlab("method type") + ylab('log(proportion of adaptive SNPs)') + theme(aspect.ratio = 1.0,panel.background = element_blank(), panel.grid.major = element_blank(),panel.grid.minor = element_blank(), panel.border = element_blank(),axis.line = element_line(size = 1), axis.line.x = element_line(color = "black", size = 1), axis.line.y = element_line(color = "black", size = 1), axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black"),axis.title = element_text(color = "black"), axis.title.y = element_text(vjust = 0.7, size = 20), axis.title.x = element_text(vjust = 0.2, size = 20), axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20, hjust = 0.5), legend.position = "none") + theme(axis.text.x = element_text(size = 20)) + scale_colour_manual(values = c("#637029")) + scale_fill_manual(values = c("#637029")) + scale_x_discrete(labels = c(bquote(F[ST]), bquote("non-"~F[ST]))) + annotate("text", x = 2.0, y = -1.5, label = "P = 0.0005", size = 7) raw_correlation_method_type_fst_non_fst_prop #### Beta -regression for testing the impact of proportion of adaptive SNPs and method type on the Observed Spearman correlation #### covariates_beta_regression_1 <- all_adapt_analysis %>% dplyr::mutate(spearman_rho = ifelse(Spearman.using.sig.PCs == "1", "0.9999999", Spearman.using.sig.PCs)) %>% arrange(Spearman.using.sig.PCs) %>% mutate(new_study_number = row_number()) %>% select(new_study_number, Study.ID, everything()) %>% dplyr::filter(!Study.ID %in% "Cullingham et al., 2014") # this is an outlier model_beta_reg_covariates_1 <- betareg(spearman_rho ~ log(proportion_of_all_adaptive) + Method_Type_FST_NON_FST, data = covariates_beta_regression_1, link = "logit", link.phi = "log") summary(model_beta_reg_covariates_1) coef <- coef(model_beta_reg_covariates_1) se <- sqrt(diag(vcov(model_beta_reg_covariates_1))) t_values <- coef / se # Calculate p-values p_values <- 2 * pt(abs(t_values), df = model_beta_reg_covariates_1$df.residual, lower.tail = FALSE) # Display results results <- data.frame(coef, se, t_values, p_values) results plot(model_beta_reg_covariates_1) #### Linear regression to test the impact of number of putatively adaptive SNPs and study-wide FST on observed minus expected R #### covariates <- all_adapt_analysis %>% select(Study.ID, Weir_cockerham_software_wc_hierfstat, difference_between_observed_and_average_spearman, number_of_adapt_snps) model_fst_diff_adapt<- lm(difference_between_observed_and_average_spearman ~ log(number_of_adapt_snps) + log(Weir_cockerham_software_wc_hierfstat), data = covariates) summary(model_fst_diff_adapt) anova(model_fst_diff_adapt) ###### FDR correction on all p-values #### ## Genome-wide and putatively adaptive R p-value correction study_wide_fst<- cor.test(fst_hierstat_schluter_way$Weir_cockerham_software_wc_hierfstat,fst_hierstat_schluter_way$Spearman.using.sig.PCs, method = "spearman") study_wide_fst$p.value model_1<- aov(neutral_pop_str_1 ~Spearman.using.sig.PCs, data = neutral_pop_structure) npr_p_value <- summary(model_1) npr_p_value <- 0.5593 genome_wide_no<- cor.test(log(number_of_genome_wide_snps$number_of_total_snps),asin(sqrt(number_of_genome_wide_snps$Spearman.using.sig.PCs)), method = "spearman") genome_wide_no$p.value adaptive_snp_no <- cor.test(number_of_adaptive_snps$log_number_of_adapt_snps,asin(sqrt(number_of_adaptive_snps$Spearman.using.sig.PCs)), method = "spearman") adaptive_snp_no$p.value prop_of_adapt <- cor.test(log(number_of_proportion_adapt_snps$proportion_of_all_adaptive),number_of_proportion_adapt_snps$Spearman.using.sig.PCs, method = "spearman") prop_of_adapt$p.value model_spearman_method <- aov(Spearman.using.sig.PCs ~ Method_Type_FST_NON_FST, data = method_type_fst_non_fst) summary(model_spearman_method) method_p_value <- 0.0182 p_values_pop <- c(study_wide_fst$p.value,npr_p_value, genome_wide_no$p.value, adaptive_snp_no$p.value, prop_of_adapt$p.value, method_p_value) adjusted_p_values <- p.adjust(p_values_pop, method = "fdr") # Observed minus expected R p-value correction study_wide_fst_2 <- cor.test(log(fst_hierstat_schluter_way$Weir_cockerham_software_wc_hierfstat),fst_hierstat_schluter_way$difference_between_observed_and_average_spearman, method = "spearman") study_wide_fst_2$p.value model_2<- aov(neutral_pop_str_1 ~difference_between_observed_and_average_spearman, data = neutral_pop_structure) summary(model_2) nps_2 <- 0.445 genome_wide_no_2 <- cor.test(log(number_of_genome_wide_snps$number_of_total_snps),number_of_genome_wide_snps$difference_between_observed_and_average_spearman, method = "spearman") genome_wide_no_2$p.value adaptive_snps_2 <- cor.test(log(number_of_adaptive_snps$number_of_adapt_snps),number_of_adaptive_snps$difference_between_observed_and_average_spearman, method = "spearman") adaptive_snps_2$p.value prop_adapt_2 <- cor.test(log(number_of_proportion_adapt_snps$proportion_of_all_adaptive),number_of_proportion_adapt_snps$difference_between_observed_and_average_spearman, method = "spearman") prop_adapt_2$p.value model_spearman_method_diff <- aov(difference_between_observed_and_average_spearman ~ Method_Type_FST_NON_FST, data = method_type_fst_non_fst) summary(model_spearman_method_diff) model_2_pvalue <- 0.377 p_values_adaptive_predictive <- c(study_wide_fst_2$p.value,nps_2, genome_wide_no_2$p.value, adaptive_snps_2$p.value, prop_adapt_2$p.value, model_2_pvalue) adjusted_p_values_2 <- p.adjust(p_values_adaptive_predictive, method = "fdr") #### Correlation among covariates #### variable_dataset_categorical <- all_adapt_analysis %>% select(number_of_adapt_snps, proportion_of_all_adaptive, number_of_total_snps, Weir_cockerham_software_wc_hierfstat, account_for_nps_or_dh_or_false_positives,Method_Type_FST_NON_FST) %>% mutate(log_number_of_adapt_snps = log(number_of_adapt_snps)) %>% mutate(log_proportion_of_all_adaptive = log(proportion_of_all_adaptive)) %>% mutate(log_number_of_total_snps = log(number_of_total_snps)) %>% mutate(log_Weir_cockerham_software_wc_hierfstat = log(Weir_cockerham_software_wc_hierfstat)) %>% select(-number_of_adapt_snps, -proportion_of_all_adaptive, -number_of_total_snps, -Weir_cockerham_software_wc_hierfstat) %>% mutate(across(where(is.factor), ~as.numeric(as.factor(.)))) %>% mutate(across(where(is.character), ~as.numeric(as.factor(.)))) correlation_matrix <- cor(variable_dataset_categorical) print(correlation_matrix)