##########################
# PAS-seq Pipeline using Tophat
##########################


1. QC
module load fastqc/0.11.2
ls [all the input.fastq] | xargs fastqc
# here I am using the combination of ls and xargs, which pump listed inputs one at a time into fastqc command tool.


2. Trim low-quality reads.
module load java/1.8.0.51
module load trimmomatic/0.35
java -jar /data/apps/trimmomatic/0.35/trimmomatic-0.35.jar SE -threads 12 -phred33 [input.fastq] [output.name] TRAILING:20 SLIDINGWINDOW:4:20 
MINLEN:20


3. Tophat mapping.
module load bowtie2/2.2.7
module load tophat/2.1.0
tophat -p 32 -o [output.name] -g 1 --library-type fr-secondstrand -G [annotation_file] [indexed_reference_genome] [input.fastq]


3.5 Remove internal priming reads
# using "PTM_filter_TophatAdapted.py" in myPythonTool folder


4. Prepare BAMs
module load samtools/1.1
samtools sort -o [output.sorted.bam] -@ 32 [input.bam]
samtools view -h [input.sorted.bam] > [output.sorted.sam]


5. Generate tracks
module load bedtools/2.23.0

# combined tracks
bedtools genomecov -ibam [input.sorted.bam] -split -bg -g [mm9_chromSize] > [output.bedGraph]

# separated tracks
bedtools genomecov -ibam [input.sorted.bam] -split -bg -strand + -g [mm9_chromSize] > [output.bedGraph]
bedtools genomecov -ibam [input.sorted.bam] -split -bg -strand - -g [mm9_chromSize] > [output.bedGraph]

# and then:
./bedGraphToBigWig [input.bedGraph] [mm9_chromSize] [output.bw]


6. Generate read counts table 
# using masterlist — Tian PAS data.txt, as reference, available at:
https://cbcl.ics.uci.edu/public_data/shilab/PASseq_szang_20160302/Tian%20PAS%20data.txt
# using "PAS_incrementer_onTophat.py" in myPythonTool folder to increment the read counts for each ApAid
# using "mergeCounts_separatedAPAgenes.py" in myPythonTool folder to combine results from each sample