# Samples in GEO NCBI database: # GSM3900810 # GSM3900821 # https://bioinfogp.cnb.csic.es/courses/quedateencasa/ oliveros@cnb.csic.es # Paired-end reads # FFFF_1.fastq.gz # FFFF_2.fastq.gz # Single-end reads # FFFF.fastq.gz # .FASTQ (.FQ): short-reads (eg. illumina) # .FASTA (.FA,.FNA): biological sequences (eg. chromosomes) # .GFF (.GFF3, .GTF):genomic elements coordinates (eg. genes) # Experiment: # https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE133136 # In R (Rconsole Rstudio...): # Archivo -> Cambiar dir... # File -> Change dir... # Misc -> Change Working Directory... # Session -> Set Working Directory... # (browse your hard disk to choose the working directory) >dir() [ENTER] (To see files and folders in the working directory) >dir("fastq") (To see files and folders in the indicated path) >install.packages("R.utils") # "package ‘R.utils’ successfully unpacked and MD5 sums checked" >library("R.utils") >install.packages("BiocManager") # To install Bioconductor packages >library("BiocManager") >BiocManager::install("Rbowtie2") # To align FASTQ files against FASTA files # Update all/some/none? [a/s/n]: # <- The answer is "n" >library("Rbowtie2") >BiocManager::install("Rsamtools") # To manipulate aligned results (.BAM files) # Update all/some/none? [a/s/n]: # <- The answer is "n" >library("Rsamtools") ##### HASTA LAS 12:30 ##### ##### CONTINUAMOS ##### >library("R.utils") >library("BiocManager") >library("Rsamtools") >gunzip("chromosomes/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz", remove=FALSE) >gunzip("fastq/1M_SRR9336468_1.fastq.gz", remove=FALSE) >gunzip("fastq/1M_SRR9336468_2.fastq.gz", remove=FALSE) >bowtie2_build("chromosomes/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa", bt2Index = "chromosomes/yeast", overwrite = TRUE) >bowtie2(bt2Index="chromosomes/yeast", samOutput = "1M_SRR9336468.sam", seq1 = "fastq/1M_SRR9336468_1.fastq", seq2 = "fastq/1M_SRR9336468_2.fastq", "--threads=3") # <- use 3 CPUs... faster!!) >asBam("1M_SRR9336468.sam") # converting SAM file into BAM file (more compact) #### IGV #### Genomes -> Load Genome from file... (Choose FASTA file in "chromosomes" folder) File -> Load File... (Choose GFF3 file in "genes" folder= Select one chromosome Right mouse at left in the genes track: Choose "Expanded" to view all genes properly File -> Load File... (Choose BAM file in working directory) ### IMPORTANT: .bam.bai file must be present in the same directory as .bam !!!!