#umi_tools whitelist --stdin data/12-22-3_L2_1.fq.gz \
# --bc-pattern="(?P<discard_1>CTCTTTCCCT)(?P<cell_1>.{10})(?P<discard_2>ACGACGCTCTTCGAGTGATTGCTTGTGACGCCTT)(?P<cell_2>.{8})(?P<umi_1>.{12})T{4}.*" \
# --set-cell-number=2000 \
# --extract-method=regex \
# --log2stderr > whitelist.txt;
#
## Step 3: Extract barcdoes and UMIs and add to read names
#umi_tools extract --bc-pattern="(?P<discard_1>CTCTTTCCCT)(?P<cell_1>.{10})(?P<discard_2>ACGACGCTCTTCGAGTGATTGCTTGTGACGCCTT)(?P<cell_2>.{8})(?P<umi_1>.{12})T{4}.*" \
# --extract-method=regex \
# --stdin data/12-22-3_L2_1.fq.gz \
# --stdout data/12-22-3_L2_1_extracted.fastq.gz \
# --read2-in data/12-22-3_L2_2.fq.gz \
# --read2-out=data/12-22-3_L2_2_extracted.fastq.gz \
# --whitelist=whitelist.txt;
# Step 4: Map reads
STAR --runThreadN 4 \
--genomeDir /share/nas5/huangls/test/scRNA-seq.1/ref/refdata-gex-mm10-2020-A/star/ \
--readFilesIn data/12-22-3_L2_2_extracted.fastq.gz \
--readFilesCommand zcat \
--outFilterMultimapNmax 1 \
--outSAMtype BAM SortedByCoordinate;
# Step 5: Assign reads to genes
/share/work/biosoft/subread/latest/bin/featureCounts \
-a /share/nas5/huangls/test/scRNA-seq.1/ref/refdata-gex-mm10-2020-A/genes/genes.gtf \
-o gene_assigned \
-R BAM Aligned.sortedByCoord.out.bam \
-T 4;
samtools sort Aligned.sortedByCoord.out.bam.featureCounts.bam -o assigned_sorted.bam;
samtools index assigned_sorted.bam;
# Step 6: Count UMIs per gene per cell
umi_tools count --per-gene --gene-tag=XT \
--assigned-status-tag=XS --per-cell \
--wide-format-cell-counts \
-I assigned_sorted.bam -S counts.tsv.gz
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!