LBC1936: Phenotypic data preparation

Author

Anna Elisabeth Furtjes

Published

June 6, 2025

Doi

10.1101/2024.11.06.622274

Data prepared here was used as input into analyses presented here. The file containing all phenotypic variables was named LBC1936_allPheno.txt.

Load packages

Code

library(data.table)

Function for longitudinal plots

Code

# write function to plot longitudinal  data
plot_long = function(dat = data, id.var = "lbc36no", var = "matreas"){

    # make sure dat is data.frame
    dat = as.data.frame(dat)
  # select data for the chosen cognitive test
  dat = dat[,c(which(names(dat) == id.var), grep(var, names(dat)))]

  # transform wide to long format
  long <- reshape2::melt(dat, id.vars = id.var, value.name = var)
  names(long)[which(names(long) == "variable")] = "Wave"
  names(long)[which(names(long) == var)] = "var"
  names(long)[which(names(long) == id.var)] = "id.var"
  # remove redundant naming from waves
  long$Wave = as.numeric(sub(".*_w", "", long$Wave))

  plot = ggplot(data = long, aes(x = Wave, y = var, group = id.var))+
  geom_point(color = "#82A0D8", size = .5)+
  geom_line(aes(group=as.factor(id.var)),method="lm", se=F, color = "#8DDFCB", size = 0.2, alpha = .2, stat =  "smooth") +
    theme(legend.position = "none")+
    theme_bw()+
    theme(text = element_text(size=20),
          plot.margin=unit(c(1, 1, 1, 1), "cm"),
          axis.text.y = element_text(size =20),
          axis.text.x = element_text(size =20),
          panel.border = element_blank())

  return(plot)
}

Cognitive tests (factor scores)

This script was kindly provided by Joanna Moodie who had modelled a general factor of cognitive ability in the LBC1936 for one of her projects.

Code

#######################################
## Read in and format data
######################################

# read data
data = foreign::read.spss(paste0(target, "/LBC1936_BrainAtrophy_AF_07NOV2023.sav"), to.data.frame=T)

##########################################################
### Format data
##########################################################

data=data[,c("lbc36no",
              "matreas_w1","matreas_w2","matreas_w3","matreas_w4","matreas_w5",
              "blkdes_w1","blkdes_w2","blkdes_w3","blkdes_w4","blkdes_w5",
              "spantot_w1","spantot_w2","spantot_w3","spantot_w4","spantot_w5",
              "vpatotal_w1","vpatotal_w2","vpatotal_w3","vpatotal_w4","vpa_total_w5",
              "lmtotal_w1","lmtotal_w2","lmtotal_w3","lmtotal_w4","lmtotal_w5",
              "digback_w1","digback_w2","digback_w3","digback_w4","digback_w5",
              "nart_w1","nart_w2","nart_total_w3","nart_total_w4","nart_total_w5",
              "wtar_w1","wtar_w2","wtar_total_w3","wtar_total_w4","wtar_total_w5",
              "vftot_w1","vftot_w2","vftot_w3" ,"vftot_w4","vftot_w5",
              "digsym_w1","digsym_w2","digsym_w3","digsym_w4","digsym_w5",
              "symsear_w1","symsear_w2","symsear_w3","symsear_w4","symsear_w5",
              "crtmean_w1","crtmean_w2","crtmean_w3","crtmean_w4","crtmean_w5",
              "ittotal_w1","ittotal_w2","ittotal_w3","ittotal_w4","ittotal_w5")]


# recode missing values
data[data == -999] <- NA
data[data == -777] <- NA
data[data == 999] <- NA
data[data == 888] <- NA
data[data == -888] <- NA

# assign correct variable classes
# some numeric columns are coded as factors when some should be integers and some should be numeric
# the code below transforms variables first into characters, then numeric because some of the values otherwise get corrupted
IntNames = names(data)[-grep("crtmean", names(data))]
IntNames = IntNames[-which(IntNames == "lbc36no")]
data[IntNames] = lapply(data[IntNames], as.character)
data[IntNames] = lapply(data[IntNames], as.integer)

NumNames = names(data)[grep("crtmean", names(data))]
data[NumNames] = lapply(data[NumNames], as.character)
data[NumNames] = lapply(data[NumNames], as.numeric)

# the symbol search variable has some impossible values (< 0).. these should be removed
for (i in names(data)[grep("symsear", names(data))]) {
  data[which(data[,i] < 0),i] <- NA
}

#rescaled some of the cognitive test variables so that variances are within a similar
#range see http://www.statmodel.com/discussion/messages/11/1615.html?1335376547

dset_mod <- mutate(data,
                   blkdes_w1 = blkdes_w1/2,
                   blkdes_w2 = blkdes_w2/2,
                   blkdes_w3 = blkdes_w3/2,
                   blkdes_w4 = blkdes_w4/2,
                   blkdes_w5 = blkdes_w5/2,
                   vftot_w1 = vftot_w1/2,
                   vftot_w2 = vftot_w2/2,
                   vftot_w3 = vftot_w3/2,
                   vftot_w4 = vftot_w4/2,
                   vftot_w5 = vftot_w5/2,
                   lmtotal_w1 = lmtotal_w1/3,
                   lmtotal_w2 = lmtotal_w2/3,
                   lmtotal_w3 = lmtotal_w3/3,
                   lmtotal_w4 = lmtotal_w4/3,
                   lmtotal_w5 = lmtotal_w5/3,
                   digback_w1 = 3*digback_w1,
                   digback_w2 = 3*digback_w2,
                   digback_w3 = 3*digback_w3,
                   digback_w4 = 3*digback_w4,
                   digback_w5 = 3*digback_w5,
                   digsym_w1 = digsym_w1/2,
                   digsym_w2 = digsym_w2/2,
                   digsym_w3 = digsym_w3/2,
                   digsym_w4 = digsym_w4/2,
                   digsym_w5 = digsym_w5/2,
                   ittotal_w1 = ittotal_w1/2,
                   ittotal_w2 = ittotal_w2/2,
                   ittotal_w3 = ittotal_w3/2,
                   ittotal_w4 = ittotal_w4/2,
                   ittotal_w5 = ittotal_w5/2,
                   crtmean_w1 = -50 * crtmean_w1,
                   crtmean_w2 = -50 * crtmean_w2,
                   crtmean_w3 = -50 * crtmean_w3,
                   crtmean_w4 = -50 * crtmean_w4,
                   crtmean_w5 = -50 * crtmean_w5)

#==================================================================
# LBC1936 factor of curves model
#==================================================================
model <- '
# test growth curves
Imatreas =~ 1*matreas_w1 + 1*matreas_w2 + 1*matreas_w3 + 1*matreas_w4 + 1*matreas_w5
Smatreas =~ 0*matreas_w1 + 2.98*matreas_w2 + 6.75*matreas_w3 + 9.82*matreas_w4 + 12.54*matreas_w5

Iblkdes =~ 1*blkdes_w1 + 1*blkdes_w2 + 1*blkdes_w3 + 1*blkdes_w4 + 1*blkdes_w5
Sblkdes=~ 0*blkdes_w1 + 2.98*blkdes_w2 + 6.75*blkdes_w3 + 9.82*blkdes_w4 + 12.54*blkdes_w5

Ispantot =~ 1*spantot_w1 + 1*spantot_w2 + 1*spantot_w3 + 1*spantot_w4 + 1*spantot_w5
Sspantot=~ 0*spantot_w1 + 2.98*spantot_w2 + 6.75*spantot_w3 + 9.82*spantot_w4 + 12.54*spantot_w5

Inart =~ 1*nart_w1 + 1*nart_w2 + 1*nart_total_w3 + 1*nart_total_w4 + 1*nart_total_w5
Snart =~ 0*nart_w1 + 2.98*nart_w2 + 6.75*nart_total_w3 + 9.82*nart_total_w4 + 12.54*nart_total_w5

Iwtar =~ 1*wtar_w1 + 1*wtar_w2 + 1*wtar_total_w3 + 1*wtar_total_w4 + 1*wtar_total_w5
Swtar =~ 0*wtar_w1 + 2.98*wtar_w2 + 6.75*wtar_total_w3 + 9.82*wtar_total_w4 + 12.54*wtar_total_w5

Ivftot =~ 1*vftot_w1 + 1*vftot_w2 + 1*vftot_w3 + 1*vftot_w4 + 1*vftot_w5
Svftot =~ 0*vftot_w1 + 2.98*vftot_w2 + 6.75*vftot_w3 + 9.82*vftot_w4 + 12.54*vftot_w5

Ivpatotal =~ 1*vpatotal_w1 + 1*vpatotal_w2 + 1*vpatotal_w3 + 1*vpatotal_w4 + 1*vpa_total_w5
Svpatotal =~ 0*vpatotal_w1 + 2.98*vpatotal_w2 + 6.75*vpatotal_w3 + 9.82*vpatotal_w4 + 12.54*vpa_total_w5

Ilmtotal =~ 1*lmtotal_w1 + 1*lmtotal_w2 + 1*lmtotal_w3 + 1*lmtotal_w4 + 1*lmtotal_w5
Slmtotal =~ 0*lmtotal_w1 + 2.98*lmtotal_w2 + 6.75*lmtotal_w3 + 9.82*lmtotal_w4 + 12.54*lmtotal_w5

Idigback =~ 1*digback_w1 + 1*digback_w2 + 1*digback_w3 + 1*digback_w4 + 1*digback_w5
Sdigback =~ 0*digback_w1 + 2.98*digback_w2 + 6.75*digback_w3 + 9.82*digback_w4 + 12.54*digback_w5

Isymsear =~ 1*symsear_w1 + 1*symsear_w2 + 1*symsear_w3 + 1*symsear_w4 + 1*symsear_w5
Ssymsear =~ 0*symsear_w1 + 2.98*symsear_w2 + 6.75*symsear_w3 + 9.82*symsear_w4 + 12.54*symsear_w5

Idigsym =~ 1*digsym_w1 + 1*digsym_w2 + 1*digsym_w3 + 1*digsym_w4 + 1*digsym_w5
Sdigsym =~ 0*digsym_w1 + 2.98*digsym_w2 + 6.75*digsym_w3 + 9.82*digsym_w4 + 12.54*digsym_w5

Iittotal =~ 1*ittotal_w1 + 1*ittotal_w2 + 1*ittotal_w3 + 1*ittotal_w4 + 1*ittotal_w5
Sittotal =~ 0*ittotal_w1 + 2.98*ittotal_w2 + 6.75*ittotal_w3 + 9.82*ittotal_w4 + 12.54*ittotal_w5

Icrtmean =~ 1*crtmean_w1 + 1*crtmean_w2 + 1*crtmean_w3 + 1*crtmean_w4 + 1*crtmean_w5
Scrtmean =~ 0*crtmean_w1 + 2.98*crtmean_w2 + 6.75*crtmean_w3 + 9.82*crtmean_w4 + 12.54*crtmean_w5

# latent g intercept and slope 
Ig =~  Iblkdes + Imatreas  + Ispantot + Ivftot + Ivpatotal + Ilmtotal +
  Idigback + Isymsear + Idigsym + Icrtmean + Iittotal + Inart + Iwtar 
# 
Sg =~ Sblkdes + Smatreas + Sspantot + Svftot + Svpatotal + Slmtotal +
  Sdigback + Ssymsear + Sdigsym + Scrtmean + Sittotal + Snart + Swtar 

#indicator as scaling reference: loading=1, int=0
Iblkdes ~ 0*1
Sblkdes ~ 0*1 

# within-wave covariances between nart and wtar
nart_w1 ~~ wtar_w1
nart_w2 ~~ wtar_w2
nart_total_w3 ~~ wtar_total_w3
nart_total_w4 ~~ wtar_total_w4
nart_total_w5 ~~ wtar_total_w5

# within-test intercept-slope covariances
Imatreas ~~ Smatreas
Iblkdes ~~ Sblkdes
#Ispantot ~~Sspantot
Inart ~~ Snart
Iwtar ~~ Swtar
Ivftot ~~ Svftot
Ivpatotal ~~ Svpatotal
Ilmtotal ~~ Slmtotal
Idigback ~~ Sdigback
Isymsear ~~ Ssymsear
Idigsym ~~ Sdigsym
Iittotal ~~ Sittotal
Icrtmean ~~ Scrtmean


# within-domain intercept-intercept and slope-slope covariances
Iblkdes ~~ Imatreas # Visuospatial domain
Iblkdes ~~ Ispantot
Imatreas ~~ Ispantot
Sblkdes ~~ Smatreas 
#Sblkdes ~~ Sspantot
#Smatreas ~~ Sspantot

Inart ~~ Ivftot #Crystalized domain
Iwtar ~~ Ivftot
Iwtar ~~ Inart
Snart ~~ Svftot
Swtar ~~ Svftot
Swtar ~~ Snart

Ilmtotal ~~ Ivpatotal # Verbal memory domain
Ilmtotal ~~ Idigback
Ivpatotal ~~ Idigback
Slmtotal ~~ Svpatotal
Slmtotal ~~ Sdigback
Svpatotal ~~ Sdigback

Iittotal ~~ Idigsym #Processing speed domain
Iittotal ~~ Isymsear
Iittotal ~~ Icrtmean
Idigsym ~~ Isymsear
Idigsym ~~ Icrtmean
Isymsear ~~ Icrtmean
Sittotal ~~ Sdigsym 
Sittotal ~~ Ssymsear
Sittotal ~~ Scrtmean
Sdigsym ~~ Ssymsear
Sdigsym ~~ Scrtmean
Ssymsear ~~ Scrtmean

#fixed negative residual variance to 0 
Sspantot ~~ 0*Sspantot
'

# fit model in lavaan 
fit <- growth(model = model, dset_mod,  missing = "ml.x")
#save=standardizedsolution(fit, output="data.frame")
summary(fit, fit.measures = T, standardized = T)

#==================================================================
# extract LBC1936 g intercepts and slopes
#==================================================================

cogscores <- dset_mod %>% select(contains("matreas") | contains("blkdes") | contains("spantot") | contains("nart") | contains("vftot") | contains("vpa") | contains("lmtotal") | contains("digback") | contains("symsear") | contains("digsym") | contains("ittotal") | contains("crtmean") )
wavesindex <- rep(c(1,2,3,4,5), 12) # index the waves 

# find people that were only tested at one wave only to exclude them from prediction
# and find people that were not tested at wave 1
w1 <- matrix(NA, nrow(cogscores), 1)
w2 <- matrix(NA, nrow(cogscores), 1)
w3 <- matrix(NA, nrow(cogscores), 1)
w4 <- matrix(NA, nrow(cogscores), 1)
w5 <- matrix(NA, nrow(cogscores), 1)
for (i in 1:nrow(cogscores)) {
  if (!all(is.na(cogscores[i, which(wavesindex == 1)]))) {
    w1[i,1] <- 1 }
  if (!all(is.na(cogscores[i, which(wavesindex == 2)]))) {
    w2[i,1] <- 1 }
  if (!all(is.na(cogscores[i, which(wavesindex == 3)]))) {
    w3[i,1] <- 1 }
  if (!all(is.na(cogscores[i, which(wavesindex == 4)]))) {
    w4[i,1] <- 1 }
  if (!all(is.na(cogscores[i, which(wavesindex == 5)]))) {
    w5[i,1] <- 1 }
}

wavesample <- cbind(w1, w2, w3, w4, w5)
rowSums(wavesample, na.rm = T)
onewave <- which(rowSums(wavesample, na.rm = T) < 2) # these people were only tested at one wave


# delete participant with all missing data
#dset_mod1 = dset_mod[-which(rowSums(is.na(dset_mod)) >= (ncol(dset_mod)-1)),]

# extract factor scores
factorScores <- data.frame(lavPredict(fit, dset_mod, 
                 type ="lv", 
                 method = "regression", 
                 label = TRUE))

# set slopes for participants who were only tested at one wave to NA
factorScores$Sg[onewave] <- NA 
# set intercepts for participants who were not tested at w1 to NA 
factorScores$Ig[which(is.na(w1))] <- NA 
# add participant ID back to factor scores
factorScores <- data.frame(lbc36no = dset_mod$lbc36no, Ig = factorScores$Ig, Sg = factorScores$Sg)

# report Ns
cbind(c("N intercepts included", "N intercepts excluded", "N slopes included", "N slopes excluded"), as.numeric(c(length(which(is.na(factorScores$Ig) == F)), length(which(is.na(factorScores$Ig) == T)), length(which(is.na(factorScores$Sg) == F)), length(which(is.na(factorScores$Sg) == T))))) 

# write.table to dat
write.table(factorScores, file = paste0(wd, "/LBC1936_Cog_FactorScores.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Plot cognitive scores

Code

# consistent variable names
names(data) <- gsub("vpa_tot", "vpatot", colnames(data)) 

# plot the different cognitive tests
p_matreas <- plot_long(dat = data, id.var = "lbc36no", var = "matreas")+
  xlab("Wave")+
  ylab("Matrix reasoning\n(# correct)\n'matreas'")

p_blkdes <- plot_long(dat = data, id.var = "lbc36no", var = "blkdes")+
  xlab("Wave")+
  ylab("Block design\n(# correct)\n'blkdes'")

p_spantot <- plot_long(dat = data, id.var = "lbc36no", var = "spantot")+
  xlab("Wave")+
  ylab("Spatial span\n(# correct)\n'spantot'")

p_vpatotal <- plot_long(dat = data, id.var = "lbc36no", var = "vpatotal")+
  xlab("Wave")+
  ylab("Verbal paired associations\n(# correct)\n'vpatotal'")

p__lmtotal <- plot_long(dat = data, id.var = "lbc36no", var = "lmtotal")+
  xlab("Wave")+
  ylab("Logical memory\n(# details recalled)\n'lmtotal'")

p_digback <- plot_long(dat = data, id.var = "lbc36no", var = "digback")+
  xlab("Wave")+
  ylab("Digit span backwards\n(max length)\n'digback'")

p_nart <- plot_long(dat = data, id.var = "lbc36no", var = "nart")+
  xlab("Wave")+
  ylab("National Adult\nReading Test\n(# correct) 'nart'")

p_wtar <- plot_long(dat = data, id.var = "lbc36no", var = "wtar")+
  xlab("Wave")+
  ylab("Wechsler Test of\nAdult Reading\n(# correct) 'wtar'")

p_vftot <- plot_long(dat = data, id.var = "lbc36no", var = "vftot")+
  xlab("Wave")+
  ylab("Verbal fluency\n(# correct)\n'vftot'")

p_digsym<- plot_long(dat = data, id.var = "lbc36no", var = "digsym")+
  xlab("Wave")+
  ylab("Digit-symbol substitution\n(# matched pairs)\n'digsym'")

p_symsear <- plot_long(dat = data, id.var = "lbc36no", var = "symsear")+
  xlab("Wave")+
  ylab("Symbol Search\n(# correct)\n'symsear'")

p_crtmean <- plot_long(dat = data, id.var = "lbc36no", var = "crtmean")+
  xlab("Wave")+
  ylab("Four-choice reaction\ntime (ms)\n'crtmean'")

p_ittotal <- plot_long(dat = data, id.var = "lbc36no", var = "ittotal")+
  xlab("Wave")+
  ylab("Inspection time\n(# correct)\n'ittotal'")

# arrange all plots
plots <- ls(pattern="p_")
plot_list <- list()

for(i in plots){
  plot_list[[i]] <- get(i)
}

# arrange all plots
ggarrange(plotlist = plot_list, ncol = 3, nrow = 5)

Clinically-ascertained all-cause dementia

This variable is available through the LBC1936 and was derived as described here.

Code

# read data
file = foreign::read.spss(paste0(target, "/LBC1936_EarlyAccessDementiaAscertainment_AF_07DEC2023.sav"), to.data.frame=T)

# remove redundant spacing 
for(i in names(file)){
  file[,i] = stringr::str_remove_all(file[,i], pattern = " ")
}

# remove NA coding
file[file == -999] <- NA
file[file == -888] <- NA

# no participants with all missing data
# which(is.na(file$dement_w1))

table(file$dementia_code)
#  Dementia NoDementia 
#       118        747 

file$dementia_code[which(file$dementia_code == "Dementia")] <- 1
file$dementia_code[which(file$dementia_code == "NoDementia")] <- 0

# binary variable as factor
file$dementia_code = as.factor(file$dementia_code)

# write.table to dat
write.table(file[,c("lbc36no", "dementia_code")], file = paste0(wd, "/LBC1936_dementia.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

APOE status

Code

# read data
file = foreign::read.spss(paste0(target, "/LBC1936_BrainAtrophy_AF_07NOV2023.sav"), to.data.frame=T)

table(file[,grep("APOE", names(file))])
#APOEgenotype
#APOEe4               ?     e2/e2 e2/e3 e2/e4 e3/e3 e3/e4 e4/e4
#No e4 allele     0     0     5   120     0   597     0     0
#e4 allele        0     0     0     0    23     0   262    21


table(file$APOEe4)
#No e4 allele    e4 allele 
#722          306

# recode
file$APOEe4 = as.numeric(file$APOEe4)-1

# make factor
file$APOEe4 = as.factor(file$APOEe4)

# write.table to dat
write.table(file[,c("lbc36no", "APOEe4")], file = paste0(wd, "/LBC1936_APOEe4.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Frailty

Code

# names(file)[grep("Frailty", names(file))]
frailty = file[,c(1,grep("Frailty", names(file)))]

# model slopes and intercepts
frailModel <- '
          i =~ 1*FrailtyIndex_W1 + 1*FrailtyIndex_W2 + 1*FrailtyIndex_W3 + 1*FrailtyIndex_W4 + 1*FrailtyIndex_W5
          s =~ 0*FrailtyIndex_W1 + 2.98*FrailtyIndex_W2 + 6.75*FrailtyIndex_W3 + 9.82*FrailtyIndex_W4 + 12.54*FrailtyIndex_W5
          '

fit = growth(frailModel, frailty, missing = "ml.x")
summary(fit, standardized = T)

# estimate individual-level values for slope and intercept
frailtyPred = as.data.frame(lavPredict(fit,
                 type ="lv", 
                 method = "regression", 
                 label = TRUE))

# merge with participant ID
frailtyPred = cbind(frailty$lbc36no,frailtyPred)
names(frailtyPred) = c("lbc36no", "iFrail","sFrail")

# save
write.table(frailtyPred, file = paste0(wd, "/LBC1936_Frailty.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Plot frailty

Code

##### visualise 
frailty = file[,c(1,grep("Frailty", names(file)))]
# unify naming (otherwise plotting function won't work)
names(frailty) = str_replace(names(frailty), pattern= "_W", replacement = "_w")

# find people who were tested at one wave only to exclude them from slope prediction
onewave = which(rowSums(is.na(frailty[,grep("_W", names(frailty))])) == 4)

# find people who were not tested at wave 1 to exclude from intercept
NotWave1 = which(is.na(frailty$FrailtyIndex_W1))

# set slopes for participants who were only tested at one wave to NA
frailtyPred$s[onewave] <- NA 
# set intercepts for participants who were not tested at w1 to NA 
frailtyPred$i[NotWave1] <- NA 

##### inspect trajectory

p_frailty <- plot_long(dat = frailty, id.var = "lbc36no", var = "FrailtyIndex")+
  xlab("Wave")+
  ylab("Frailty index")

p_frailty

Diabetes

Code

# no participants with all missing data
# which(is.na(file$diab_w1))

# first of all, all No
file$diab_life = 0
file$diab_life[which(file$diab_w1 == "Yes" | 
                         file$diab_w2 == "Yes" |
                         file$diab_w3 == "Yes" | 
                         file$diab_w4 == "Yes" | 
                         file$diab_w5 == "Yes")] = 1

# remove those with all missing
allMissing = which(rowSums(!is.na(file[,grep("diab_w", names(file))])) == 0) 
if(length(allMissing) != 0){file$diab_life[allMissing] = NA}


table(file$diab_life)
# No Yes 
# 942 149 

file$diab_life = as.factor(file$diab_life)

# write.table to dat
write.table(file[,c("lbc36no", "diab_life")], file = paste0(wd, "/LBC1936_diabetes.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Hypertension

Code

# no participants with all missing data
# which(is.na(file$hibp_w1))

# first of all, all No
file$hypertension_life = 0
file$hypertension_life[which(file$hibp_w1 == "Yes" | 
                       file$hibp_w2 == "Yes" |
                       file$hibp_w3 == "Yes" | 
                       file$hibp_w4 == "Yes" | 
                       file$hibp_w5 == "Yes")] = 1

# remove those with all missing
allMissing = which(rowSums(!is.na(file[,grep("hibp_w", names(file))])) == 0) 
if(length(allMissing) != 0){file$hypertension_life[allMissing] = NA}

file$hypertension_life = as.factor(file$hypertension_life)

table(file$hypertension_life)
# No Yes 
# 448 643 

# write.table to dat
write.table(file[,c("lbc36no", "hypertension_life")], file = paste0(wd, "/LBC1936_hypertension.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Smoking (packyears)

Number of cigarettes per year x years of smoking / 20 (pack size)

Code

# keep variables of interest
smok = file[,c(1,grep("smo", names(file)))]

# wave 1 has 465 more non-missing values than wave 2 
# all.equal(smok$smokagestop_w1, smok$smokagestop_w2)

# keep only first wave
smok = smok[,c(1,grep("_w1", names(smok)))]

# identify participants who have starting year but not end year as they must still be smokers
smok$smokagestop = ifelse(smok$smokcat_w1 == "current smoker", 70, smok$smokagestop_w1)

# extract years of smoking (so far, regardless if they stopped smoking or not)
smok$yearsSmok = smok$smokagestop - smok$smokagestart_w1

# two entries are smoknumcigs == 0 which would null the equation
# one of those participants have indicates to be a current smoker and the other one to hae never smoked (but still gave starting age)
# as I don't know how that came about, I have deleted these two data points
smok[which(smok$smoknumcigs_w1 == 0),] = NA

# calculate packyears
smok$packyears = (smok$smoknumcigs_w1 * 365 * smok$yearsSmok)/20

# null the never smokers
smok$packyears[which(smok$smokcat_w1 == "never smoked")] = 0

hist(smok$packyears)

# write.table to dat
write.table(smok[,c("lbc36no", "packyears")], file = paste0(wd, "/LBC1936_packYears.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Body mass index (BMI)

Code

# names(file)[grep("bmi", names(file))]
bmi = file[,c(1,grep("bmi", names(file)))]

# recode missing
bmi[bmi == -999] <- NA

# model slopes and intercepts
BMImodel <- '
          i =~ 1*bmi_w1 + 1*bmi_w2 + 1*bmi_w3 + 1*bmi_w4 + 1*bmi_w5
          s =~ 0*bmi_w1 + 2.98*bmi_w2 + 6.75*bmi_w3 + 9.82*bmi_w4 + 12.54*bmi_w5
          '

fit = growth(BMImodel, bmi, missing = "ml.x")
summary(fit, standardized = T)

# estimate individual-level values for slope and intercept
bmiPred = as.data.frame(lavPredict(fit,
                 type ="lv", 
                 method = "regression", 
                 label = TRUE))

# merge with participant ID
bmiPred = cbind(bmi$lbc36no, bmiPred)
names(bmiPred) = c("lbc36no", "iBMI", "sBMI")

# write.table to dat
write.table(bmiPred[,c("lbc36no", "iBMI", "sBMI")], file = paste0(wd, "/LBC1936_bmi.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Plot BMI

Code

## visualise
bmi = file[,c(1,grep("bmi", names(file)))]

# find people who were tested at one wave only to exclude them from slope prediction
onewave = which(rowSums(is.na(bmi[,grep("_W", names(bmi))])) == 4)

# find people who were not tested at wave 1 to exclude from intercept
NotWave1 = which(is.na(bmi$bmiIndex_W1))

# set slopes for participants who were only tested at one wave to NA
bmiPred$s[onewave] <- NA 
# set intercepts for participants who were not tested at w1 to NA 
bmiPred$i[NotWave1] <- NA 

summary(bmiPred)

# write.table to dat
write.table(bmiPred, file = paste0(wd, "/LBC1936_bmiFactorScores.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

##### inspect trajectory
plot_long(dat = bmi, id.var = "lbc36no", var = "bmi")+
  xlab("Wave")+
  ylab("bmi index")

Brain age

Code

# read file
file = as.data.frame(foreign::read.spss(paste0(target, "/BrainAgeVia2p1_AF_07DEC2023.sav"), to.file.frame=T))

# remove ID info when waves are indicated in "JC_BrainAge_ID"
file$JC_BrainAge_ID = as.numeric(gsub(".*_w", "", file$JC_BrainAge_ID))

# rename columns more intuitively
names(file)[which(names(file) == "JC_BrainAge_ID")] = "wave"
names(file)[which(names(file) == "JCBA_brain_age_W2")] = "brainAgeEst"

# read in age info 
ageInfo = as.data.frame(foreign::read.spss("LBC1936_BrainAtrophy_AF_07NOV2023.sav"))
# strange name formatting
ageInfo$lbc36no =gsub(" ", "", ageInfo$lbc36no)

# merge info 
file = merge(file, ageInfo[, c("lbc36no", "agedays_w2")], by = "lbc36no")

# get age in days in years to match estimated brain age
file$agedays_w2 = file$agedays_w2/365

# get brain age gap which is the difference between brain age and chronological age
# positive value should mean the particpant has a healthier looking brain tthan expected given their age 
# negative value should mean the participant has an unhealthier looking brain than expected
file$brainAge = file$agedays_w2 - file$brainAgeEst

# summary stats
summary(file$brainAge)
hist(file$brainAge)

# write.table to dat
write.table(file[,c("lbc36no","brainAge")], file = paste0(wd, "/LBC1936_brainAge.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Stroke

Code

# set path to where data was saved 
target="/BrainAtrophy/data"

# read data
file = foreign::read.spss(paste0(target, "/LBC1936_BrainAtrophy_AF_07NOV2023.sav"), to.data.frame=T)

# names(file)[grep("stroke", names(file))]

# first of all, all No
file$strokemask_life = 0
# identify people who had no scan
file$strokemask_life[which(is.na(file$stroke_mask_w2) & 
                             is.na(file$stroke_mask_w3) &
                             is.na(file$stroke_mask_w4) &
                             is.na(file$stroke_mask_w5))] = NA

file$strokemask_life[which(file$stroke_mask_w2 == "Yes Stroke Mask - Had scan" | 
                               file$stroke_mask_w3 == "Yes Stroke Mask - Had scan" |
                               file$stroke_mask_w4 == "Yes Stroke Mask - Had scan" | 
                               file$stroke_mask_w5 == "Yes Stroke Mask - Had scan")] = 1


file$strokemask_life <- as.factor(file$strokemask_life)

table(file$strokemask_life)
# No Yes 
# 544 156 

# write.table to dat
write.table(file[,c("lbc36no","strokemask_life")], file = paste0(wd, "/LBC1936_strokemask.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Visual rating scales

Note we did not have access to rating scales at wave 5, only wave 3.

Code

# set path to where data was saved 
target="/BrainAtrophy/data"

# read data
file = foreign::read.spss(paste0(target, "/LBC1936_BrainAtrophy_AtrophyRating_AF_03MAY2024.sav"), to.data.frame=T)

# strange name formatting
file$lbc36no = gsub(" ", "", file$lbc36no)

# atrophy deep and superficial are in great agreement (only 80 participants where they are not identical)
sum(file$atrophy_deep_w3 == file$atrophy_superficial_w3, na.rm=T)

# recode so the scales are numeric
file$atrophy_deep_recoded = NA
file$atrophy_deep_recoded[grepl("(<25th)", file$atrophy_deep_w3)] = 1
file$atrophy_deep_recoded[grepl("(25-50th)", file$atrophy_deep_w3)] = 2
file$atrophy_deep_recoded[grepl("(50-75th)", file$atrophy_deep_w3)] = 3
file$atrophy_deep_recoded[grepl("(75-95th)", file$atrophy_deep_w3)] = 4
file$atrophy_deep_recoded[grepl("(>95th)", file$atrophy_deep_w3)] = 5
file$atrophy_deep_recoded[grepl("(>>5)", file$atrophy_deep_w3)] = 6

# same for atrophy superficial
file$atrophy_superficial_recoded = NA
file$atrophy_superficial_recoded[grepl("(<25th)", file$atrophy_superficial_w3)] = 1
file$atrophy_superficial_recoded[grepl("(25-50th)", file$atrophy_superficial_w3)] = 2
file$atrophy_superficial_recoded[grepl("(50-75th)", file$atrophy_superficial_w3)] = 3
file$atrophy_superficial_recoded[grepl("(75-95th)", file$atrophy_superficial_w3)] = 4
file$atrophy_superficial_recoded[grepl("(>95th)", file$atrophy_superficial_w3)] = 5
file$atrophy_superficial_recoded[grepl("(>>5)", file$atrophy_superficial_w3)] = 6


# write.table to dat
write.table(file[,c("lbc36no","atrophy_deep_recoded","atrophy_superficial_recoded")], file = paste0(target, "/LBC1936_atrophyScales.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Age

Code

# read data
file = foreign::read.spss(paste0(target, "/LBC1936_BrainAtrophy_AF_07NOV2023.sav"), to.data.frame=T)

# strange name formatting
file$lbc36no = gsub(" ", "", file$lbc36no)

# keep age variable for first neuroimaging visit
file = file[,c("lbc36no", "ageMRI_w2")]

# save file
write.table(file, file = paste0(target, "/LBC1936_age_w2.txt"), quote = F, col.names = T, row.names = F, sep = "\t")

Merge all LBC variables into one file

Code

# it makes it more straightforward to conduct the following analyses if I merge all phenotypes into one file
# Step 1: Read all phenotypes in
# Step 2: Merge them
# Step 3: Save

# cognitive ability
cog = fread(paste0(wd, "/LBC1936_Cog_FactorScores.txt"))
# dementia
dement = fread(paste0(wd, "/LBC1936_dementia.txt"))
# APOE
APOE = fread(paste0(wd, "/LBC1936_APOEe4.txt"))
# Frailty
frail = fread(paste0(wd, "/LBC1936_Frailty.txt"))
# diabetes
diab = fread(paste0(wd, "/LBC1936_diabetes.txt"))
# hyp
hyp = fread(paste0(wd, "/LBC1936_hypertension.txt"))
# packyears
smok = fread(paste0(wd, "/LBC1936_packYears.txt"))
# bmi
bmi = fread(paste0(wd, "/LBC1936_bmi.txt"))
# brain age
BrainAge = fread(paste0(wd, "/LBC1936_brainAge.txt"))
# stroke
stroke = fread(paste0(wd, "/LBC1936_strokemask.txt"))
# atrophy scales
atrophy = fread(paste0(wd, "/LBC1936_atrophyScales.txt"))
# age
age = fread(paste0(wd, "/LBC1936_age_w2.txt"))

# merge data
DatList = list(cog, dement, APOE, frail, diab, hyp, smok, bmi, BrainAge, stroke, atrophy, age)
LBC_merged = Reduce(function(x,y) merge(x, y, by = "lbc36no", all = T), DatList)

# remove empty rows
LBC_merged = LBC_merged[-which(rowSums(!is.na(LBC_merged)) == 0),]

# choose prettier names
names(LBC_merged) = c("lbc36no","iCog","sCog","dementia","APOEe4","iFrailty","sFrailty","diabetes","hypertension","packyears","iBMI","sBMI","BrainAge","Stroke","VisualAtrophyDeep","VisualAtrophySuperficial","ageMRI_w2")

# remove empty rows
LBC_merged = LBC_merged[rowSums(is.na(LBC_merged)) != ncol(LBC_merged),]

# write
fwrite(LBC_merged, file = paste0(wd, "/LBC1936_allPheno.txt"), col.names = T, row.names = F, quote = F, na = NA, sep = "\t")