\[\\[0.5in]\]
Here we compare genetic and phenotypic brain networks and investigate whether they have the same properties in terms of their association with age-volume correlations.
We hypothesise a positive association and high congruence between the two sets of factor loadings which would implicate that the same dimensions that underlie phenotypic variation in regional volumes also control the dimension of genetic sharing across these brain volumes. That is, it would suggest that phenotypic structural integrity recapitulates genetic integrity and that they are underpinned by similar general biological mechanisms.
Previously, we have calculated a phenotypic and genetic correlation matrix between 83 brain areas. Here we test whether phenotypic correlations were equivalent to genetic correlations.
##### correlate genetic and phenotypic correlations between brain regions
##### load genetic data
#set wd
workingd<-getwd()
temporarywd<-paste0(workingd,"/data_my_own/ldsc/")
setwd(temporarywd)
load("whole_brain.RData")
ldscoutput<-LDSCoutput_wholebrain
# name rows and columns
dimnames(ldscoutput$S_Stand)[[1]]<-dimnames(ldscoutput$S)[[2]]
dimnames(ldscoutput$S_Stand)[[2]]<-dimnames(ldscoutput$S)[[2]]
# keep lower matrix
cormatrix<-ldscoutput$S_Stand
get_lower_tri<-function(cormatrix){
cormatrix[upper.tri(cormatrix)] <- NA
return(cormatrix)
}
genetic_cor<-get_lower_tri(cormatrix)
genetic_cor<-reshape2::melt(genetic_cor)
names(genetic_cor)<-c("region1","region2","genetic_cor")
######## load phenotypic data
temporarywd_pheno<-paste0(workingd,"/data_my_own/Pheno_preparation/")
setwd(temporarywd_pheno)
load("pheno_decomposition.RData")
phenotypic_cor<-cor_matrix
# sort same as genetic matrix
names<-dimnames(ldscoutput$S)[[2]]
phenotypic_cor<-phenotypic_cor[names,names]
# get lower triangle
phenotypic_cor<-get_lower_tri(phenotypic_cor)
phenotypic_cor<-reshape2::melt(phenotypic_cor)
names(phenotypic_cor)<-c("region1","region2","phenotypic_cor")
gen_pheno<-merge(genetic_cor,phenotypic_cor,by=c("region1","region2"))
# remove correlations with itself (genetic and phenotypic)
gen_pheno$genetic_cor<-ifelse(gen_pheno$region1 == gen_pheno$region2,NA,gen_pheno$genetic_cor)
gen_pheno$phenotypic_cor<-ifelse(gen_pheno$region1 == gen_pheno$region2,NA,gen_pheno$phenotypic_cor)
# calculate correlation between phenotypic and genetic correlations
#cor.test(gen_pheno$genetic_cor,gen_pheno$phenotypic_cor)
cor_cor<-round(cor(gen_pheno$genetic_cor,gen_pheno$phenotypic_cor,use="pairwise.complete.obs"),digits=2)
#plot(gen_pheno$genetic_cor,gen_pheno$phenotypic_cor,ylab="Phenotypic correlations",xlab="Genetic correlations",xlim=c(-0.3,1),ylim=c(-0.3,1))
#abline(lm(gen_pheno$phenotypic_cor~gen_pheno$genetic_cor,na.action = "na.omit"),col="blue")
#abline(0,1,col="red")
#abline(x=0,col="green")
library(ggplot2)
library(ggpubr)
plot_corr<-ggplot(gen_pheno, aes(x=genetic_cor,y=phenotypic_cor))+#
geom_point(size=4,alpha=0.4)+
geom_smooth(method=lm, se=TRUE) #+
#ggtitle("Phenotypic vs. genetic correlations")
plot_corr<- plot_corr+xlab("Genetic correlations")+ ylab("Phenotypic correlations")+
theme(plot.title = element_text(size=20, hjust = 0.5), panel.background = element_blank(), axis.line = element_line(color="black"), axis.line.x = element_line(color="black"))+ theme_bw()+
stat_cor(method="pearson",cor.coef.name="r",size=4.5,position = "identity",)+
theme(legend.position = "none")+
geom_abline(intercept=0,slope=(1),linetype="dashed",color="red",show.legend=T, size=1,alpha=0.7)+
geom_vline(xintercept = 0,linetype="dashed",color="black",size=1.5,alpha=0.2)+
theme(text = element_text(size=12,family="Arial Narrow"),
axis.text.x = element_text(size=15,family="Arial Narrow"),#angle=45
axis.text.y = element_text(size=15,family="Arial Narrow"),
axis.title.y = element_text(face="bold", colour='black', size=17,family="Arial Narrow"),
axis.title.x = element_text(face="bold", colour='black', size=17,family="Arial Narrow"))
plot_corr
summary(lm(gen_pheno$phenotypic_cor~gen_pheno$genetic_cor,na.action = "na.omit"))
##
## Call:
## lm(formula = gen_pheno$phenotypic_cor ~ gen_pheno$genetic_cor,
## na.action = "na.omit")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34100 -0.04031 0.00022 0.04062 0.25199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.065770 0.002713 24.25 <2e-16 ***
## gen_pheno$genetic_cor 0.598096 0.006674 89.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06172 on 3401 degrees of freedom
## (3486 observations deleted due to missingness)
## Multiple R-squared: 0.7025, Adjusted R-squared: 0.7024
## F-statistic: 8031 on 1 and 3401 DF, p-value: < 2.2e-16
The red line is the line of identity.
We correlated phenotypic with genetic correlations among these areas, and we obtained a significant correlation of 0.84. This indicates that phenotypic correlations predict genetic correlations fairly well.
temporarywd<-paste0(workingd,"/data_my_own/ldsc/")
setwd(temporarywd)
load("whole_brain.RData")
ldscoutput<-LDSCoutput_wholebrain
dimnames(ldscoutput$S_Stand)[[1]]<-dimnames(ldscoutput$S)[[2]]
dimnames(ldscoutput$S_Stand)[[2]]<-dimnames(ldscoutput$S)[[2]]
k<-nrow(ldscoutput$S)
SE<-matrix(0, k, k)
SE[lower.tri(SE,diag=TRUE)] <-sqrt(diag(ldscoutput$V))
get_lower_tri<-function(cormatrix){
cormatrix[upper.tri(cormatrix)] <- NA
return(cormatrix)
}
SE_lower<-get_lower_tri(SE)
SE_lower<-reshape2::melt(SE_lower)
# load phenotypic loadings
temporarywd_pheno<-paste0(workingd,"/data_my_own/Pheno_preparation/")
setwd(temporarywd_pheno)
load("pheno_decomposition.RData")
pheno_stand_loadings<-stand_loadings
names(pheno_stand_loadings)[2]<-"pheno_stand_loadings"
pheno_stand_loadings$pheno_stand_loadings<-pheno_stand_loadings$pheno_stand_loadings*(-1)
# load genetic loadings
library(data.table)
temporarywd<-paste0(workingd,"/data_my_own/standardised_loadings/")
setwd(temporarywd)
gen_stand_loadings<-read.table("stand_loadings_whole_brain.txt",header=T)
names(gen_stand_loadings)[2]<-"gen_stand_loadings"
# multiply the loadings with -1 if the median of the loadings is negative
median_loadings<-median(gen_stand_loadings$gen_stand_loadings)
mean_direction<-sign(median_loadings)
if(mean_direction == -1){
gen_stand_loadings$gen_stand_loadings<-gen_stand_loadings$gen_stand_loadings*(-1)
} else {
gen_stand_loadings$gen_stand_loadings<-gen_stand_loadings$gen_stand_loadings}
stand_loadings<-merge(pheno_stand_loadings,gen_stand_loadings,by="Regions")
#calculate correlation between loadings
summary(lm(stand_loadings$pheno_stand_loadings~stand_loadings$gen_stand_loadings,na.action = "na.omit"))
##
## Call:
## lm(formula = stand_loadings$pheno_stand_loadings ~ stand_loadings$gen_stand_loadings,
## na.action = "na.omit")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.181192 -0.043779 0.000451 0.056009 0.132253
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.14693 0.03830 3.836 0.000246 ***
## stand_loadings$gen_stand_loadings 0.64624 0.06081 10.626 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07121 on 81 degrees of freedom
## Multiple R-squared: 0.5823, Adjusted R-squared: 0.5771
## F-statistic: 112.9 on 1 and 81 DF, p-value: < 2.2e-16
# save statistics in objects for text
model<-lm(stand_loadings$pheno_stand_loadings~stand_loadings$gen_stand_loadings,na.action = "na.omit")
beta<-round(summary(model)$coefficient[2,1],digits = 2)
SE<-round(summary(model)$coefficient[2,2],digits = 2)
pval<-signif(summary(model)$coefficient[2,4],digits =3)
## calculate absolute mean difference between phenotypic and genetic PC loadings
#delta_cor<-abs(stand_loadings$pheno_stand_loadings-stand_loadings$gen_stand_loadings)
#mean(delta_cor,na.rm=T)
#sd(delta_cor,na.rm=T)
Genetic loadings significantly predict phenotypic loadings (b = 0.65, SE = 0.06, pvalue = 5.07^{-17}).
library(ggplot2)
library(ggpubr)
# read in area names of central executive areas
central_areas<-fread(paste0(workingd,"/Scripts/my_own_gwas/2Perform_ldsc/central_executive_areas.txt"),header=F,data.table=F)
central_areas<-as.vector(central_areas[,1])
# create vector encoding which areas are part of central executive network
stand_loadings$central_exec<-FALSE
for(i in central_areas){
stand_loadings$central_exec[which(stand_loadings$Regions==i)]<-TRUE
}
# chose which colors to assign to central executive areas
stand_loadings$central_col<-ifelse(stand_loadings$central_exec==1,"red","darkblue")
plot_loadings<-ggplot(stand_loadings, aes(x=gen_stand_loadings,y=pheno_stand_loadings))+#
geom_point(size=4,alpha=0.4,color=stand_loadings$central_col)+
geom_smooth(method=lm, se=TRUE) #+
#ggtitle("Phenotypic vs. genetic PC loadings")
plot_loadings<-plot_loadings+
xlab("Genetic PC loadings")+
ylab("Phenotypic PC loadings")+
theme(
plot.title = element_text(size=20, hjust = 0.5),
panel.background = element_blank(),
axis.line = element_line(color="black"),
axis.line.x = element_line(color="black"))+
theme_bw()+
stat_cor(method="pearson",cor.coef.name="r",size=4.5,label.x.npc=0.65,label.y.npc = 0)+
theme(legend.position = "none")+
geom_abline(intercept=0,
slope=(1),
linetype="dashed",
color="red",
show.legend=T,
size=1.5,
alpha=0.6)+
theme(text = element_text(size=12),
axis.text.x = element_text(size=15),
axis.text.y = element_text(size=15),
axis.title.y = element_text(face="bold", colour='black', size=17),
axis.title.x = element_text(face="bold", colour='black', size=17))
plot_loadings
The red line is the line of identity. PC loadings marked in red belong to regions that are part of the central executive network.
# to calculate factor congruence, insert covariance matrix
# cor_matrix saved in pheno_decomposition.RData is phenotypic cov matrix
# https://openpsych.net/forum/attachment.php?aid=364
# cut off for Tucker index to assume similarity range from .85-.94
library(psych)
pheno_eigenvector<-principal(cor_matrix)
match_names<-dimnames(cor_matrix)[[1]]
# genetic matrix inferred through LDSC
temporarywd<-paste0(workingd,"/data_my_own/ldsc/")
setwd(temporarywd)
load("whole_brain.RData")
genetic_whole_brain_matrix<-LDSCoutput_wholebrain$S_Stand
colnames(genetic_whole_brain_matrix)<-dimnames(LDSCoutput_wholebrain$S)[[2]]
rownames(genetic_whole_brain_matrix)<-dimnames(LDSCoutput_wholebrain$S)[[2]]
genetic_whole_brain_matrix<-genetic_whole_brain_matrix[match_names,match_names]
gen_eigenvector<-principal(genetic_whole_brain_matrix)
tucker_index<-factor.congruence(x=pheno_eigenvector,y=gen_eigenvector)
# smoothing required because the last PC has negative eigenvalues
The Tucker congruence coefficient for the loadings of the first PC in phenotypic vs. genetic correlation matrices is 0.99. It has been previously suggested that an index between .85-.94 indicates a “fair similarity” between the loadings (Lorenzo-Seva & ten Berge, 2006, Methodology, doi: 10.1027/1614-1881.2.2.57).
# read in genetic corr matrix
temporarywd<-paste0(workingd,"/data_my_own/ldsc/")
setwd(temporarywd)
load("whole_brain.RData")
genetic_whole_brain_matrix<-LDSCoutput_wholebrain$S_Stand
colnames(genetic_whole_brain_matrix)<-dimnames(LDSCoutput_wholebrain$S)[[2]]
rownames(genetic_whole_brain_matrix)<-dimnames(LDSCoutput_wholebrain$S)[[2]]
# set wd to where region names per network are saved
temporarywd<-paste0(workingd,"/Scripts/my_own_gwas/2Perform_ldsc/")
setwd(temporarywd)
# load files containing region names
central_exec_regions<-read.table("central_executive_areas.txt")$V1
cingulo_regions<-read.table("cingulo_opercular_areas.txt")$V1
default_regions<-read.table("default_mode_areas.txt")$V1
hippocampal_regions<-read.table("hippocampal_diencephalic_areas.txt")$V1
multiple_demand_regions<-read.table("multiple_demand_areas.txt")$V1
p_fit_regions<-read.table("p_fit_areas.txt")$V1
salience_regions<-read.table("salience_areas.txt")$V1
sensori_regions<-read.table("sensorimotor_areas.txt")$V1
temporo_regions<-read.table("temporo_amygdala_orbitofrontal_areas.txt")$V1
library(psych)
library(stringr)
network_regions<-ls(pattern="_regions")
# create df to display Tucker-Lewis index per network
display_Tucker<-data.frame(matrix(ncol=2))
colnames(display_Tucker)<-c("network","Tucker_index")
for(i in network_regions){
regions<-get(i)
#print(i)
# decomposition pheno
pheno<-principal(cor_matrix[regions,regions])
# decomposition geno
geno<-principal(genetic_whole_brain_matrix[regions,regions])
# calculate Tucker-Lewis index
tucker_lewis_index<-factor.congruence(x=pheno,y=geno)
# save index
network_name<-str_remove(i,pattern="_regions")
#assign(network_name,tucker_lewis_index)
# save index in table
display_Tucker[which(network_regions ==i),"network"]<-network_name
display_Tucker[which(network_regions ==i),"Tucker_index"]<-tucker_lewis_index
}
kable(display_Tucker, label="Tucker-Lewis Index indicating factor congruence between phenotypic and genetic networks", col.names = c("Networks","Tucker congruence coefficient"))
Networks | Tucker congruence coefficient |
---|---|
central_exec | 1.00 |
cingulo | 0.99 |
default | 0.99 |
hippocampal | 0.99 |
multiple_demand | 1.00 |
p_fit | 0.99 |
salience | 0.99 |
sensori | 0.99 |
temporo | 0.99 |
We hypothesise a negative correlation between genetic brain-wide PC loadings and phenotypically derived age sensitivities. We do not make a prediction about the size of the correlation as no previous study known to us has investigated this question on a genetic level. A significant association would indicate that the same general dimensions of variation in global brain aging underlie the dimension of genetic sharing across brain regions.
# load phenotypic loadings
names(age_cor)[2]<-"Regions"
phenotest<-merge(stand_loadings,age_cor,by="Regions")
plot_age_pheno<-ggplot(phenotest, aes(x=pheno_stand_loadings,y=age_cor))+#
geom_point(size=4,alpha=0.4,color=phenotest$central_col)+
geom_smooth(method=lm, se=TRUE)#,color="blue") #+
#ggtitle("Phenotypic age correlations vs. phenotypic PC loadings")
plot_age_pheno<-
plot_age_pheno+
xlab("Phenotypic PC loadings")+
ylab("Age correlations")+
theme(plot.title = element_text(size=20, hjust = 0.5),
panel.background = element_blank(),
axis.line = element_line(color="black"),
axis.line.x = element_line(color="black"))+
theme_bw()+
stat_cor(method="pearson",
cor.coef.name="r",
size=4.5,
color="black",
label.x.npc=0.65,
label.y.npc = 0)+
theme(legend.position = "none")+
theme(text = element_text(size=12),
axis.text.x = element_text(size=15),#angle=45
axis.text.y = element_text(size=15),
axis.title.y = element_text(face="bold", colour='black', size=17),
axis.title.x = element_text(face="bold", colour='black', size=17))
plot_age_pheno
PC loadings marked in red belong to regions that are part of the central executive network.
# merge age correlations with standardised loadings
names(age_cor)[2]<-"Regions"
data<-merge(stand_loadings,age_cor,by="Regions")
# correlate genetic stand loadings with age cor
age_correlation_result<-cor.test(data$gen_stand_loadings,data$age_cor)
age_cor_estimate<-round(age_correlation_result$estimate,digits = 2)
age_cor_pval<-round(age_correlation_result$p.value,digits = 3)
Genetic loadings are significantly correlated with age sensitivity (r = -0.27, p-value = 0.012).
PC loadings marked in red belong to regions that are part of the central executive network.
library(cowplot)
tiff("corr.tiff", width = 15, height = 10, units = 'in', res=1000)
plot_grid(plot_corr,plot_loadings,plot_age_pheno,plot_age_geno,nrow = 2,ncol = 2,labels=c("A","B","C","D"),width=1,height=1, label_size = 18,hjust = 0.07,scale = 0.85)
dev.off()
By Anna Elisabeth Fürtjes
anna.furtjes@kcl.ac.uk