Integration between Multiple spatial datasets and single cell sequencing data¶
Set-Up Giotto Environment¶
library(Giotto)
Set A Working Directory¶
#results_folder = '/path/to/directory/'
results_folder = '/Volumes/Ruben_Seagate/Dropbox (Personal)/Projects/GC_lab/Ruben_Dries/190225_spatial_package/Results/Visium/Brain/201226_results//'
Set A Giotto Python Path¶
# set python path to your preferred python version path
# set python path to NULL if you want to automatically install (only the 1st time) and use the giotto miniconda environment
python_path = NULL
if(is.null(python_path)) {
    installGiottoEnvironment()
}
Dataset Explanation¶
10X genomics <https://www.10xgenomics.com/spatial-transcriptomics> recently launched a new platform to obtain spatial expression data using a Visium Spatial Gene Expression slide.
The Visium Cancer Prostate data to run this tutorial can be found here. The Visium Normal Prostate data to run this tutorial can be found here.
High resolution png from original tissue:
 
 
1. Create Giotto Object And Join¶
dataDir <- 'path/to/data'
## obese upper
N_pros = createGiottoVisiumObject(
    visium_dir = paste0(dataDir,'/Visium_FFPE_Human_Normal_Prostate/'),
    expr_data = 'raw',
    png_name = 'tissue_lowres_image.png',
    gene_column_index = 2,
    instructions = instrs
)
## obese lower
C_pros = createGiottoVisiumObject(
    visium_dir = paste0(dataDir,'/Visium_FFPE_Human_Prostate_Cancer/'),
    expr_data = 'raw',
    png_name = 'tissue_lowres_image.png',
    gene_column_index = 2,
    instructions = instrs
)
# join giotto objects
# joining with x_shift has the advantage that you can join both 2D and 3D data
# x_padding determines how much distance is between each dataset
# if x_shift = NULL, then the total shift will be guessed from the giotto image
testcombo = joinGiottoObjects(gobject_list = list(N_pros, C_pros),
    gobject_names = c('NP', 'CP'),
    join_method = 'shift', x_padding = 1000)
# join info is stored in this slot
# simple list for now
testcombo@join_info
# check joined Giotto object
fDataDT(testcombo)
pDataDT(testcombo)
showGiottoImageNames(testcombo)
showGiottoSpatLocs(testcombo)
showGiottoExpression(testcombo)
# this plots all the images by list_ID
spatPlot2D(gobject = testcombo, cell_color = 'in_tissue',
    show_image = T, image_name = c("NP-image", "CP-image"),
    group_by = 'list_ID', point_alpha = 0.5,
    save_param = list(save_name = "1a_plot"))
 
# this plots one selected image
spatPlot2D(gobject = testcombo, cell_color = 'in_tissue',
    show_image = T, image_name = c("NP-image"), point_alpha = 0.3,
    save_param = list(save_name = "1b_plot"))
 
# this plots two selected images
spatPlot2D(gobject = testcombo, cell_color = 'in_tissue',
    show_image = T, image_name = c( "NP-image", "CP-image"),
    point_alpha = 0.3,
    save_param = list(save_name = "1c_plot"))
 
2. Process Giotto Objects¶
     # subset on in-tissue spots
     metadata = pDataDT(testcombo)
     in_tissue_barcodes = metadata[in_tissue == 1]$cell_ID
     testcombo = subsetGiotto(testcombo, cell_ids = in_tissue_barcodes)
## filter
testcombo <- filterGiotto(gobject = testcombo,
    expression_threshold = 1,
    feat_det_in_min_cells = 50,
    min_det_feats_per_cell = 500,
    expression_values = c('raw'),
    verbose = T)
## normalize
testcombo <- normalizeGiotto(gobject = testcombo, scalefactor = 6000)
## add gene & cell statistics
testcombo <- addStatistics(gobject = testcombo, expression_values = 'raw')
fmeta = fDataDT(testcombo)
testfeats = fmeta[perc_cells > 20 & perc_cells < 50][100:110]$feat_ID
violinPlot(testcombo, feats = testfeats, cluster_column = 'list_ID', save_param = list(save_name = "2a_plot"))
plotMetaDataHeatmap(testcombo, selected_feats = testfeats, metadata_cols = 'list_ID', save_param = list(save_name = "2b_plot"))
 
 
## visualize
#fDataDT(testcombo)
spatPlot2D(gobject = testcombo, group_by = 'list_ID', cell_color = 'nr_feats', color_as_factor = F, point_size = 0.75, save_param = list(save_name = "2c_plot"))
 
3. Dimension Reduction¶
## PCA ##
testcombo <- calculateHVF(gobject = testcombo)
testcombo <- runPCA(gobject = testcombo, center = TRUE, scale_unit = TRUE)
screePlot(testcombo, ncp = 30, save_param = list(save_name = "3a_screeplot"))
 
4. Clustering¶
4.1 Without Integration¶
Integration is usually needed for dataset of different conditions to minimize batch effects. Without integration means without using any integration methods.
## cluster and run UMAP ##
# sNN network (default)
testcombo <- createNearestNetwork(gobject = testcombo,
    dim_reduction_to_use = 'pca', dim_reduction_name = 'pca',
    dimensions_to_use = 1:10, k = 15)
# Leiden clustering
testcombo <- doLeidenCluster(gobject = testcombo, resolution = 0.2, n_iterations = 1000)
# UMAP
testcombo = runUMAP(testcombo)
plotUMAP(gobject = testcombo,
    cell_color = 'leiden_clus', show_NN_network = T, point_size = 1.5,
    save_param = list(save_name = "4.1a_plot"))
 
    spatPlot2D(gobject = testcombo, group_by = 'list_ID',
cell_color = 'leiden_clus',
point_size = 1.5,
save_param = list(save_name = "4.1b_plot"))
 
    spatDimPlot2D(gobject = testcombo,
cell_color = 'leiden_clus',
save_param = list(save_name = "4.1c_plot"))
 
4.2 With Harmony integration¶
Harmony is a integration algorithm developed by Korsunsky, I. et al.. It was designed for integration of single cell data but also work well on spatial datasets.
## data integration, cluster and run UMAP ##
# harmony
#library(devtools)
#install_github("immunogenomics/harmony")
library(harmony)
## run harmony integration
testcombo = runGiottoHarmony(testcombo, vars_use = 'list_ID', do_pca = F)
## sNN network (default)
testcombo <- createNearestNetwork(gobject = testcombo,
    dim_reduction_to_use = 'harmony', dim_reduction_name = 'harmony', name = 'NN.harmony',
    dimensions_to_use = 1:10, k = 15)
## Leiden clustering
testcombo <- doLeidenCluster(gobject = testcombo,
    network_name = 'NN.harmony', resolution = 0.2, n_iterations = 1000, name = 'leiden_harmony')
# UMAP dimension reduction
testcombo = runUMAP(testcombo, dim_reduction_name = 'harmony', dim_reduction_to_use = 'harmony', name = 'umap_harmony')
plotUMAP(gobject = testcombo,
    dim_reduction_name = 'umap_harmony',
    cell_color = 'leiden_harmony',
    show_NN_network = F,
    point_size = 1.5,
    save_param = list(save_name = "4.2a_plot"))
# If you want to show NN network information, you will need to specify these arguments in the plotUMAP function
# show_NN_network = T, nn_network_to_use = 'sNN' , network_name = 'NN.harmony'
 
    spatPlot2D(gobject = testcombo, group_by = 'list_ID',
cell_color = 'leiden_harmony',
point_size = 1.5,
save_param = list(save_name = "4.2b_plot"))
 
    spatDimPlot2D(gobject = testcombo,
dim_reduction_to_use = 'umap', dim_reduction_name = 'umap_harmony',
cell_color = 'leiden_harmony',
save_param = list(save_name = "4.2c_plot"))
 
# compare to previous results
spatPlot2D(gobject = testcombo,
    cell_color = 'leiden_clus',
    save_param = list(save_name = "4_w_o_integration_plot"))
spatPlot2D(gobject = testcombo,
    cell_color = 'leiden_harmony',
    save_param = list(save_name = "4_w_integration_plot"))
 
5. Cell-Type Annotation¶
- Visium spatial transcriptomics does not provide single-cell resolution, making cell type annotation a harder problem. Giotto provides several ways to calculate enrichment of specific cell-type signature gene list:
- PAGE 
- hypergeometric test 
- Rank 
 
This is also the easiest way to integrate Visium datasets with single cell data. Example shown here is from Ma et al. from two prostate cancer patients. The raw dataset can be found here Giotto_SC is processed variable in the single cell RNAseq tutorial. You can also get access to the processed files of this dataset using getSpatialDataset
# download data to results directory ####
# if wget is installed, set method = 'wget'
# if you run into authentication issues with wget, then add " extra = '--no-check-certificate' "
getSpatialDataset(dataset = 'Human_PCa_scRNAseq', directory = results_folder)
sc_expression = paste0(results_folder, "/prostate_sc_expression_matrix.csv.gz")
sc_metadata = paste(results_folder,"/prostate_sc_metadata.csv")
giotto_SC <- createGiottoObject(
expression = sc_expression,
instructions = instrs
)
giotto_SC <- addCellMetadata(giotto_SC,
                            new_metadata = data.table::fread(sc_metadata))
giotto_SC<- normalizeGiotto(giotto_SC)
5.1 PAGE Enrichment¶
# Create PAGE matrix
# PAGE matrix should be a binary matrix with each row represent a gene marker and each column represent a cell type
# markers_scran is generated from single cell analysis ()
markers_scran = findMarkers_one_vs_all(gobject=giotto_SC, method="scran",
                                    expression_values="normalized", cluster_column='prostate_labels', min_feats=3)
top_markers <- markers_scran[, head(.SD, 10), by="cluster"]
celltypes<-levels(factor(markers_scran$cluster))
sign_list<-list()
for (i in 1:length(celltypes)){
sign_list[[i]]<-top_markers[which(top_markers$cluster == celltypes[i]),]$feats
}
PAGE_matrix = makeSignMatrixPAGE(sign_names = celltypes,
                                sign_list = sign_list)
testcombo = runPAGEEnrich(gobject = testcombo,
          sign_matrix = PAGE_matrix,
          min_overlap_genes = 2)
cell_types_subset = colnames(PAGE_matrix)
# Plot PAGE enrichment result
spatCellPlot(gobject = testcombo,
            spat_enr_names = 'PAGE',
            cell_annotation_values = cell_types_subset[1:4],
            cow_n_col = 2,coord_fix_ratio = NULL, point_size = 1.25,
            save_param = list(save_name = "5a_PAGE_plot"))
 
5.2 Hypergeometric test¶
testcombo = runHyperGeometricEnrich(gobject = testcombo,
                expression_values = "normalized",
                sign_matrix = PAGE_matrix)
cell_types_subset = colnames(PAGE_matrix)
spatCellPlot(gobject = testcombo,
            spat_enr_names = 'hypergeometric',
            cell_annotation_values = cell_types_subset[1:4],
            cow_n_col = 2,coord_fix_ratio = NULL, point_size = 1.75,
            save_param = list(save_name = "5b_HyperGeometric_plot"))
 
5.3 Rank Enrichment¶
# Create rank matrix, not that rank matrix is different from PAGE
# A count matrix and a vector for all cell labels will be needed
rank_matrix = makeSignMatrixRank(sc_matrix = get_expression_values(giotto_SC,values = "normalized"),
                                sc_cluster_ids = pDataDT(giotto_SC)$prostate_label)
colnames(rank_matrix)<-levels(factor(pDataDT(giotto_SC)$prostate_label))
testcombo = runRankEnrich(gobject = testcombo, sign_matrix = rank_matrix,expression_values = "normalized")
# Plot Rank enrichment result
spatCellPlot2D(gobject = testcombo,
            spat_enr_names = 'rank',
            cell_annotation_values = cell_types_subset[1:4],
            cow_n_col = 2,coord_fix_ratio = NULL, point_size = 1,
            save_param = list(save_name = "5c_Rank_plot"))
 
5.4 DWLS Deconvolution¶
# Create DWLS matrix, not that DWLS matrix is different from PAGE and rank
# A count matrix a vector for a list of gene signatures and a vector for all cell labels will be needed
DWLS_matrix<-makeSignMatrixDWLSfromMatrix(matrix = as.matrix(get_expression_values(giotto_SC,values = "normalized")),
                                        cell_type = pDataDT(giotto_SC)$prostate_label,
                                        sign_gene = top_markers$feats)
testcombo = runDWLSDeconv(gobject = testcombo, sign_matrix = DWLS_matrix)
# Plot DWLS deconvolution result
spatCellPlot2D(gobject = testcombo,
            spat_enr_names = 'DWLS',
            cell_annotation_values = levels(factor(pDataDT(giotto_SC)$prostate_label))[1:4],
            cow_n_col = 2,coord_fix_ratio = NULL, point_size = 1,
            save_param = list(save_name = "5d_DWLS_plot"))
 
