What’s the difference between plasma cells in the lymphoid regions vs those in the lamina propria?

ie. We expect there to be some difference in cell state of plasma cells depending on where they are.

NB: We’re working with some very broad annotations, our ‘plasma’ cell group is really plasma and B cells!

p1 <- plotSpatialFeature(sfe.sample.HC, "celltype_subset",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

p2 <- plotSpatialFeature(sfe.sample.HC, "niche",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

p1 + p2 + plot_layout(nrow = 2) & 
  theme(legend.position = "bottom")

Each niche has a different balance of cell types within it - and we can again see the sizeable proportion of plasma cells present in both n3 and n6.

## Celltype proportions
celltype_summary_table<- colData(sfe) %>% as_tibble() %>%
  group_by(celltype_subset, niche ) %>%
  summarise(n_cells = n())
## `summarise()` has grouped output by 'celltype_subset'. You can override using
## the `.groups` argument.
ggplot(celltype_summary_table, aes(fill=celltype_subset, y=n_cells, x=niche) )+
  geom_bar(position="fill", stat="identity") +
  theme_bw() +
  coord_flip() +
  theme(legend.position = "bottom") +
  scale_y_continuous(expand = c(0,0)) +
  ggtitle( "Niche composition by cell type")

Pool samples into pseudobulk

Once again, lets build (load) a pseudobulk representation to run a differential expression. This time, we will pool cells on the basis of sample, cell type and niche.

# # DO NOT RUN
# sfe$pdb_sample <- paste0(sfe$tissue_sample, '_', sfe$niche,"_", sfe$celltype_subset)
# se.pdb <- aggregateAcrossCells(sfe, ids=sfe$pdb_sample,
#                                BPPARAM = MulticoreParam(workers=4)  )

Load pseudobulk

se.pdb <- readRDS(spe_pseudobulk_by_celltype_in_niche_file)
counts(se.pdb)[1:5,1:5]
##      CD_a_n1_epi CD_a_n1_myeloids CD_a_n1_plasmas CD_a_n1_stroma CD_a_n1_tcells
## AATK           4                2               9              7              0
## ABL1           9                2              14             21              0
## ABL2           4                1               4              7              3
## ACE            1                2               6              3              1
## ACE2           8                5              13              7              0

Filter to a minimum number of cells per pool

DT::datatable(data.frame(colData(se.pdb))[,c('pdb_sample','individual_code', 'niche', 'celltype_subset' ,'tissue_sample','ncells')])

With the extra level of separation - the pools (and thus total counts) are smaller.

We only care about plasma cells, and mostly only in n3 and n6 - so we can set a threshold accordingly. Here we choose 50 cells.

# What is the minimum acceptable number of cells in a pool?
# Using a smaller threshold, we have smaller groups
min_cells_per_pdbsample <- 50

ggplot(colData(se.pdb), aes(x=ncells, col=celltype_subset)) +
  geom_density() +
  geom_vline(xintercept=min_cells_per_pdbsample, lty=3) +
  geom_rug() +
  scale_x_log10() +
  theme_bw() +
  ggtitle("How many cells per pseduobulk sample?") +
  facet_wrap(~niche)
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_density()`).

Apply ncell filter, and also filter down to just plasma cells.

# Filter on minimum number of cells per pdb group
# (ncells column was added by aggregateAcrossCells)
se.pdb <- se.pdb[,se.pdb$ncells >= min_cells_per_pdbsample]

se.pdb <- se.pdb[,se.pdb$celltype_subset == "plasmas"]

Differential expression

Only testing for difference in plasma cells located in the B/T cell rich lymphoid features (n6) vs those in the epithelial/stromal mix of the laminar propria layer (n3).

Here we will ignore the condition (HC/CD) entirely, but we will have a paired analysis because we’ll have pools of differently localised plasma cells from the same individual.

There are plenty of plasma cells within other niches, so we will keep them in our model, even if we’re not testing for them directly.

Note that there are only 4 samples for niche 6 - not every sample had lymphoid regions to begin with.

# Pull out the pseudobulk counts matrix for passed samples
pseudobulk_counts_matrix <- counts(se.pdb)
# And the corresponding annotations
pseudobulk_anno_table    <- as_tibble(colData(se.pdb)[,c('pdb_sample','group', 'tissue_sample', 'celltype_subset','niche', 'ncells')]) # + any other experimental factors

How many (plasma) pools per niche?

table(pseudobulk_anno_table$niche, pseudobulk_anno_table$tissue_sample)
##     
##      CD_a CD_b CD_c HC_a HC_b HC_c
##   n1    1    1    1    1    1    1
##   n2    1    1    1    1    1    1
##   n3    1    1    1    1    1    1
##   n4    1    1    1    1    1    1
##   n5    0    0    0    1    1    1
##   n6    0    0    1    1    1    1
##   n7    0    0    0    0    0    0
##   n8    0    1    0    0    0    0

This code tests a single comparison, using the same approach as before. (As per Differential Expression: By Celltype and Differential Expression: By Niche)

# And pull out the annotation and counts
anno_table   <- as.tibble(colData(se.pdb))
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## i Please use `as_tibble()` instead.
## i The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
counts_matrix <- counts(se.pdb)


# Setup objects for limma
dge <- DGEList(counts_matrix)
dge <- calcNormFactors(dge)

# Build model
# Not considering group
individual      <- anno_table$tissue_sample
niche           <- anno_table$niche

# Model design
design    <- model.matrix( ~0 + niche + individual)

# Run voom
vm  <- voom(dge, design = design, plot = FALSE)
## Coefficients not estimable: nichen7
## Warning: Partial NA coefficients for 999 probe(s)
# Fit model
fit     <- lmFit(vm, design) 
## Coefficients not estimable: nichen7
## Warning: Partial NA coefficients for 999 probe(s)
# Just one test: Are the plasma cells different in the n3 (lyphoid) regions vs dispersed in the epithelial adjacent stroma?
contrasts <- makeContrasts(contrasts="nichen6-nichen3",
                           levels=coef(fit))

fit <- contrasts.fit(fit, contrasts)
fit <- eBayes(fit)


de_result.plasmaloc <- topTable(fit, n = Inf, adjust.method = "BH", coef = 1) %>%
      rownames_to_column("target") %>%
      mutate(contrast="n6Vsn3",
             contrast_group="localisation",
             celltype="plasmas") %>%
      select(celltype,contrast_group, contrast,target,everything()) %>%
      arrange(P.Value)
DT::datatable(de_result.plasmaloc)

Evaluate DE (by plasma cell localisation) genes

JCHAIN

sfe.sample.plasmas <- sfe.sample.HC[, sfe.sample.HC$celltype_subset == "plasmas"]
plotSpatialFeature( sfe.sample.HC, "JCHAIN",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

plotSpatialFeature( sfe.sample.plasmas , "niche",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

plotSpatialFeature( sfe.sample.plasmas , "JCHAIN",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

CCL19

Contrast with CCL19 - which is more associated with the nearby T-cells.

plotSpatialFeature( sfe.sample.HC, "CCL19",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

plotSpatialFeature( sfe.sample.plasmas , "niche",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

plotSpatialFeature( sfe.sample.plasmas , "CCL19",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

CD37

plotSpatialFeature( sfe.sample.HC, "CD37",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

plotSpatialFeature( sfe.sample.plasmas , "niche",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)

plotSpatialFeature( sfe.sample.plasmas , "CD37",colGeometryName = "cellSeg") + 
  theme(legend.title=element_blank()) +
  ggtitle(sample)