forked from philippinespire/pire_fq_gz_processing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunFASTP_2.sbatch
executable file
·81 lines (68 loc) · 2.16 KB
/
runFASTP_2.sbatch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/bin/bash -l
# this script will do sliding window and left 5' trimming on paired end and orphan files
# no merging of overlapping reads
# this is third step in prepping reads for de novo assembly
#SBATCH --job-name=fastp
#SBATCH -o fastp_2nd_-%j.out
#SBATCH --time=00:00:00
#SBATCH --cpus-per-task=40
enable_lmod
module load container_env pire_genome_assembly/2021.07.01
#module load multiqc
#module load parallel
export SINGULARITY_BIND=/home/e1garcia
INDIR=$1 #/home/e1garcia/shotgun_PIRE/fq_4denovo_fp1_clmp
OUTDIR=$2 #/home/e1garcia/shotgun_PIRE/fq_4denovo_fp1_clmp_fp2
if [[ -z "$3" ]]; then
MINLEN=140
nBP_FRONT_TRIM=0
else
MINLEN=$3
fi
if [[ -z "$4" ]]; then
nBP_FRONT_TRIM=0
else
MINLEN=MINLEN-$nBP_FRONT_TRIM
nBP_FRONT_TRIM=$4
fi
FQPATTERN=*r1.fq.gz #determines files to be trimmed
EXTPATTERN=r[12]\.fq\.gz # pattern match to fq extensions
FWDEXT=r1.fq.gz
REVEXT=r2.fq.gz
THREADS=20 #1/2 of total threads avail
#MINLEN=$((140-$nBP_FRONT_TRIM))
mkdir $OUTDIR $OUTDIR/failed
# trim r1 & r2 files
ls $INDIR/$FQPATTERN | \
sed -e "s/$EXTPATTERN//" -e 's/.*\///g' | \
uniq | \
crun parallel --no-notice -j $THREADS \
fastp \
--in1 $INDIR/{}$FWDEXT \
--in2 $INDIR/{}$REVEXT \
--out1 $OUTDIR/{}fp2_r1.fq.gz \
--out2 $OUTDIR/{}fp2_r2.fq.gz \
--unpaired1 $OUTDIR/failed/{}fp2_unprd.fq.gz \
--unpaired2 $OUTDIR/failed/{}fp2_unprd.fq.gz \
--failed_out $OUTDIR/failed/{}fp2_fail.fq.gz \
-h $OUTDIR/{}r1r2_fastp.html \
-j $OUTDIR/{}r1r2_fastp.json \
--detect_adapter_for_pe \
--trim_front1 $nBP_FRONT_TRIM \
--trim_front2 $nBP_FRONT_TRIM \
--length_required $MINLEN \
--cut_front \
--cut_front_window_size 1 \
--cut_front_mean_quality 20 \
--cut_right \
--cut_right_window_size 10 \
--cut_right_mean_quality 20 \
--disable_trim_poly_g \
--correction \
--disable_quality_filtering \
--unqualified_percent_limit 40 \
--report_title "Second Trim R1R2"
# run multiqc on the fp2 results
module load container_env multiqc
#srun crun multiqc $OUTDIR -n $OUTDIR/2nd_fastp_report
srun crun multiqc -v -p -ip -f --data-dir --data-format tsv --cl-config "max_table_rows: 3000" --filename 2nd_fastp_report --outdir $OUTDIR $OUTDIR