######################################################### # # # Useful info (AprendeBioinformáticaEnCasa: session 11) # # By: Juan Carlos Oliveros (BioinfoGP, CNB-CSIC) # # # ######################################################### # # # Tools to be installed: # # Linux sudo apt update sudo apt install samtools sudo apt install bcftools sudo apt install bwa #MAC OSX (homebrew already installed) brew install samtools brew install bcftools brew install bwa # Copy fastq files to the main directory # source: https://www.ncbi.nlm.nih.gov/Traces/study/?acc=SRP261139 cp ena_files/SRR11772640/SRR11772640.fastq.gz . cp ena_files/SRR11772645/SRR11772645.fastq.gz . cp ena_files/SRR11772648/SRR11772648.fastq.gz . cp ena_files/SRR11772651/SRR11772651.fastq.gz . cp ena_files/SRR11772654/SRR11772654.fastq.gz . cp ena_files/SRR11772660/SRR11772660.fastq.gz . cp ena_files/SRR11772663/SRR11772663.fastq.gz . cp ena_files/SRR11772666/SRR11772666.fastq.gz . cp ena_files/SRR11772672/SRR11772672.fastq.gz . cp ena_files/SRR11772675/SRR11772675.fastq.gz . cp ena_files/SRR11772680/SRR11772680.fastq.gz . cp ena_files/SRR11772685/SRR11772685.fastq.gz . # Copy genome files to a different directory (to be created with "mkdir"): mkdir genomes cp genes_and_genomes/genes_and_genomes/* genomes/ # Create directories to store results: mkdir bam mkdir vcf # align with bwa, manipulate results with samtools and detect variants # with bcftools: GENOME=genomes/Sars_cov_2.ASM985889v3.dna.primary_assembly.MN908947.3.fa GENES=genomes/Sars_cov_2.ASM985889v3.100.primary_assembly.MN908947.3.gff3 PREFIX[1]=SRR11772640 PREFIX[2]=SRR11772645 PREFIX[3]=SRR11772648 PREFIX[4]=SRR11772651 PREFIX[5]=SRR11772654 PREFIX[6]=SRR11772660 PREFIX[7]=SRR11772663 PREFIX[8]=SRR11772666 PREFIX[9]=SRR11772672 PREFIX[10]=SRR11772675 PREFIX[11]=SRR11772680 PREFIX[12]=SRR11772685 for n in {1..12} do CURRENT=${PREFIX[n]} bwa aln -t 3 "${GENOME}" "${CURRENT}.fastq.gz" > "${CURRENT}.sai" bwa samse "${GENOME}" "${CURRENT}.sai" "${CURRENT}.fastq.gz" > "bam/${CURRENT}.sam" samtools view -Sb "bam/${CURRENT}.sam" > "bam/${CURRENT}.bam" samtools sort "bam/${CURRENT}.bam" > "bam/${CURRENT}.sorted.bam" samtools index "bam/${CURRENT}.sorted.bam" bcftools mpileup -Ou -f "${GENOME}" "bam/${CURRENT}.sorted.bam"|bcftools call -mv -o "vcf/${CURRENT}.calls.vcf" bcftools csq -c ANN --phase a -f "${GENOME}" -g "${GENES}" "vcf/${CURRENT}.calls.vcf" -o "vcf/${CURRENT}.ann.vcf" done # BoinfoGP's viewer for SNPs: # https://bioinfogp.cnb.csic.es/tools/snper/ # Alternative method for perform variant calling: # Very popular but complex: GATK # https://gatk.broadinstitute.org/hc/en-us # Alternative command-line to annotate variants # (very good but not easy to setup): snpEff # http://snpeff.sourceforge.net/