-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathvisium_config_flow.yaml
150 lines (150 loc) · 10 KB
/
visium_config_flow.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
user.id: "[email protected]" # for internal metadata handling
run.name: "Short name for the analysis"
run.description: "Description of the analysis"
run.summary: "Short Descritpion fo the analyiss"
experiment.ids: [
# List of experiments to analyze
# Experiment names must match folder names in 'prep' folder.
# They will be analyzed concurrently using the same configuration params.
"CytAssist_FFPE_Human_Lung_Squamous_Cell_Carcinoma", # Experiment 1
"CytAssist_11mm_FFPE_Human_Lung_Cancer" # Experiment 2
]
pipelines.enabled:
# Defines the pipelines to be executed
# Note that there may be dependencies between pipelines, if basic pipelines like
# celldeconv or imgseg are set to FALSE, the succeeding pipelines will only execute
# if there are previous celldeconv or imgseg available in the results folder
imgseg: False # cell segmentation
cell2spot: False # matching cells to visium spots
celldeconv: False # cell deconvoluiton
cluster: False # morphological clustering
assign: False # cell assignment integrates celldeconvolution with cell segmentation
qc: False # QC metrics generation
datamerge: False # To visualize in Tissuumaps enable "datamerge: true"
spatialanalysis: False # Spatial analysis reporting
imgseg:
# Select the imagesegmentation method to be used and define its parameters
image.resolution: "400dpi" # information tracked as metada, not used in the analysis
image.magnification: "20x" # information tracked as metada, not used in the analysis
model: # Select between cellpose and hovernet
name: "cellpose"
version: "2.1.1" # version tracking for retrocompatibility purposes
params:
# Check https://cellpose.readthedocs.io/en/latest/settings.html
patch_size: 512 # Patch size for the Neural network
overlap: 496 # Pixel at which patches will overlap
downsample_factor: 1 # Reduce the size of the image. Set this to 2 if your image is 40x
n_channels: 3 # Number or channels the image has
channels: [1,0] # 0=grayscale, 1=red, 2=green, 3=blue
model_type: "nuclei" # cellpose model, nuclei is reccomended
batch_size: 100 # Number of tiles that will be simultaneously processed
diameter: 16 # expected cell diameter in pixels. 0 will try to autoestimate
flow_threshold: 0.8 # low values increase confidence and reduce recall, high values increase recall and reducee precision
### Uncomment this block and comment the prevoious one if you prefer to use Hovernet
# name: "hovernet"
# version: "0.0.1" # version tracking for retrocompatibility purposes
# params:
# gpu: '0' # ID of the GPU to be used
# nr_types: 5 # Number of cell types Hovernet weights were trained with
# batch_size: 16 # Number of tiles that will be simultaneously processed
# model_mode: "original" # Hovernet model to use (fast or original)
# nr_inference_workers: 1 # Parallelization
# nr_post_proc_workers: 1 # Paralleization
# wsi: True # Analyze the whole slide
# tile: False # Analyze the tiles independenlty
# help: False # must be set to false
# model_name: "hovernet_original_consep_type_tf2pytorch.tar" # can be replaced with custom model
# type_filename: "type_info.json" # can be replaced with custom labels
cell2spot:
# Pipeline that will assign segmented cells to their correspondent spots. no need to configure it
model:
name: "default"
version: "X.X.X" # version tracking for retrocompatibility purposes
celldeconv:
# Select the celldeconv method to be used (CARD or cell2location)
model:
### Uncomment this block to use CARD ###
# name: "card"
# version: "X.X.X" # version tracking for retrocompatibility purposes
# params:
# #params for the dataset under analysis
# min_count_gene : 100 # numeric, include spatial locations where at least this number of counts detected. Default is 100.
# min_count_spot : 5 # numeric, include genes where at least this number of spatial locations that have non-zero expression. Default is 5.
# ct_varname: 'cellType' # caracter, the name of the column in sc_meta that specifies the cell type assignment.
# ct_select: 'NULL' # vector of cell type names that you are interested in to deconvolute, default as NULL. If NULL, then use all cell types provided by single cell dataset.
# atlas_type : 'kidney' # reference sc dataset to be used. name must match *_cell_atlas.h5ad objects in reference folder.
# sc_label_key : 'cell_type' # the .obs data frame column name defining cell type annotation
# sc_sample_key : 'batch' # this is the 'batch' parameter for CARD
name: "cell2location"
version: "0.1.3" # version tracking for retrocompatibility purposes
params:
seed : 2023
#params for clean sc datasets
sc_cell_count_cutoff : 20 # a int variable for gene filter. A gene should be detected over minimum of cells counts e.g. should be detected in over 20 cells
sc_cell_percentage_cutoff2 : 0.05 # (0,1) float variable for gene filter. A gene should be detected over minimum percent of cells.
sc_nonz_mean_cutoff : 1.12 # (1, ) float variable for gene filter. A gene should have mean expression across non-zero cells slightly larger than 1
#params for training sc to get st signatures
sc_batch_key: "batch" # column in the reference data set that determines experiment category grouping e.g. 10X reaction / sample / batch
sc_label_key: "cell_type" # column in the reference data set used for constructing signatures
sc_categorical_covariate_keys: [] # multiplicative technical effects (platform, 3' vs 5', donor effect), can be empty list
sc_max_epoches : 50 # maximum number of epochs used when building the sc signature file
sc_lr : 0.02 # learning rate used when building the sc signature file
sc_use_gpu : True # use GPU for building the sc signature file
#parames for training st to get cell aboundance
st_N_cells_per_location : 20 # expected number of cells per spot
st_detection_alpha : 200 # hyperparameter controlling normalisation of within-experiment variation in RNA detection
st_max_epoches : 25000 # maximum number of epochs used when building the sc signature file. Increase this value if the cell distribution do not seem to follow random patterns
cell_aboundance_threshold : 0.1 # cells which abundance in a spot is lower than this threshold will be ignored
atlas_type : 'luca' # reference sc dataset to be used. name must match *_cell_atlas.h5ad objects in reference folder.
mode : ### model
retrain_cell_sig: True # if set to false, it will load the pre-trained {atlas_name}_cell_sig.h5 file from reference. If set to true, it will train again the cell signature. Set to true the first time the atlas is used.
cluster:
# Parameters to set up morphological clustering
model:
name: "gaussian" # "gaussian" or "kmeans"
version: "X.X.X" # version tracking for retrocompatibility purposes
params:
n_clusters: 20 # number of clusters
spot_clustering: True
# clustering_batch_size: 1024 # partition data in groups for efficiency. only used in kmeans
assign:
# Select betweeen local, naive or global assigning methods
model:
name: "local" #
version: "X.X.X" # version tracking for retrocompatibility purposes
# model:
# name: "naive"
# version: "X.X.X" # version tracking for retrocompatibility purposes
# params:
# random_state: 0
# model:
# name: "global" # May have very slow performance
# version: "X.X.X" # version tracking for retrocompatibility purposes
# params:
# n_clusters: 20 # number of clusters to consider. make it consistent with expected cell types
# clustering_batch_size: 1024 # partition data in groups for efficiency. only used in kmeans
# random_state: 0
qc:
# QC metrics module, no need to setup
model:
name: "default"
version: "X.X.X" # version tracking for retrocompatibility purposes
datamerge:
run_data_merge_only: True
model:
name: "default"
version: "X.X.X" # version tracking for retrocompatibility purposes
params:
annotation_file: "annotations.geojson" # name of the file containing annotations
spot_index: "barcode" # column containing the barcode index (default: "barcode")
cell_index: "cell_ids" # column containing the cell ids (default: "cell_ids")
# Genes defined in the following parameters will be used for the spatial structure analyisis and visualization
target_genes: [] # List of genes to be included [gene1,gene2,gene3]
n_top_expressed_genes: 750 # Forces to include the top 500 most expressed genes in the reporting
n_top_variability_genes: 750 # Forces to include the top 500 genes with highest variability in the reporting
spatialanalysis:
# Spatial structure analyiss params, no need to set up.
model:
name: "default"
version: "X.X.X" # version tracking for retrocompatibility purposes
run_id: "714" # for internal metadata handling