Initial RNA-seq DESeq2 pipeline manifests
This commit is contained in:
+13
@@ -0,0 +1,13 @@
|
||||
# 01-pvc.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: rnaseq-workspace
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: longhorn
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
@@ -0,0 +1,59 @@
|
||||
# 02-job-sra-download.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: sra-download
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
backoffLimit: 2
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
initContainers:
|
||||
- name: download-sra
|
||||
image: ncbi/sra-tools:3.0.0
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -e
|
||||
mkdir -p /data/raw
|
||||
|
||||
echo "=== Downloading ERR458493 (WT, single-end) ==="
|
||||
fasterq-dump ERR458493 --outdir /data/raw --temp /tmp --threads 4
|
||||
|
||||
echo "=== Downloading ERR458500 (snf2 mutant, single-end) ==="
|
||||
fasterq-dump ERR458500 --outdir /data/raw --temp /tmp --threads 4
|
||||
|
||||
echo "=== Compressing ==="
|
||||
gzip /data/raw/*.fastq
|
||||
|
||||
echo "=== Done ==="
|
||||
ls -lh /data/raw/
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "2"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "4"
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
|
||||
containers:
|
||||
- name: verify
|
||||
image: busybox:1.36
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
echo "=== Raw files ==="
|
||||
ls -lh /data/raw/
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
|
||||
volumes:
|
||||
- name: workspace
|
||||
persistentVolumeClaim:
|
||||
claimName: rnaseq-workspace
|
||||
@@ -0,0 +1,42 @@
|
||||
# 02b-job-sra-download-extra.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: sra-download-extra
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
backoffLimit: 2
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
initContainers:
|
||||
- name: download-sra
|
||||
image: ncbi/sra-tools:3.0.0
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -e
|
||||
for acc in ERR458494 ERR458495 ERR458501 ERR458502; do
|
||||
echo "=== Downloading $acc ==="
|
||||
fasterq-dump $acc --outdir /data/raw --temp /tmp --threads 4
|
||||
done
|
||||
gzip /data/raw/*.fastq
|
||||
ls -lh /data/raw/
|
||||
resources:
|
||||
requests: { memory: "1Gi", cpu: "2" }
|
||||
limits: { memory: "2Gi", cpu: "4" }
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
containers:
|
||||
- name: verify
|
||||
image: busybox:1.36
|
||||
command: ["/bin/sh", "-c"]
|
||||
args: ["ls -lh /data/raw/"]
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
volumes:
|
||||
- name: workspace
|
||||
persistentVolumeClaim:
|
||||
claimName: rnaseq-workspace
|
||||
@@ -0,0 +1,43 @@
|
||||
# 03-job-fastqc.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: fastqc
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
backoffLimit: 2
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
containers:
|
||||
- name: fastqc
|
||||
image: biocontainers/fastqc:v0.11.9_cv8
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -e
|
||||
echo "=== Running FastQC on all samples ==="
|
||||
fastqc \
|
||||
/data/raw/ERR458493.fastq.gz \
|
||||
/data/raw/ERR458500.fastq.gz \
|
||||
--outdir /data/fastqc \
|
||||
--threads 4
|
||||
|
||||
echo "=== Done ==="
|
||||
ls -lh /data/fastqc/
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "2"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "4"
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
|
||||
volumes:
|
||||
- name: workspace
|
||||
persistentVolumeClaim:
|
||||
claimName: rnaseq-workspace
|
||||
@@ -0,0 +1,62 @@
|
||||
# 04-job-star.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: star-align
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
containers:
|
||||
- name: star
|
||||
image: quay.io/biocontainers/star:2.7.10a--h9ee0642_0
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -e
|
||||
|
||||
echo "=== Aligning ERR458493 (WT) ==="
|
||||
mkdir -p /data/aligned/ERR458493
|
||||
STAR \
|
||||
--runMode alignReads \
|
||||
--genomeDir /data/reference/star_index \
|
||||
--readFilesIn /data/raw/ERR458493.fastq.gz \
|
||||
--readFilesCommand zcat \
|
||||
--outSAMtype BAM SortedByCoordinate \
|
||||
--outSAMattributes NH HI AS NM \
|
||||
--outFileNamePrefix /data/aligned/ERR458493/ \
|
||||
--runThreadN 4
|
||||
|
||||
echo "=== Aligning ERR458500 (snf2 mutant) ==="
|
||||
mkdir -p /data/aligned/ERR458500
|
||||
STAR \
|
||||
--runMode alignReads \
|
||||
--genomeDir /data/reference/star_index \
|
||||
--readFilesIn /data/raw/ERR458500.fastq.gz \
|
||||
--readFilesCommand zcat \
|
||||
--outSAMtype BAM SortedByCoordinate \
|
||||
--outSAMattributes NH HI AS NM \
|
||||
--outFileNamePrefix /data/aligned/ERR458500/ \
|
||||
--runThreadN 4
|
||||
|
||||
echo "=== Done ==="
|
||||
ls -lh /data/aligned/ERR458493/
|
||||
ls -lh /data/aligned/ERR458500/
|
||||
resources:
|
||||
requests:
|
||||
memory: "6Gi"
|
||||
cpu: "4"
|
||||
limits:
|
||||
memory: "10Gi"
|
||||
cpu: "4"
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
|
||||
volumes:
|
||||
- name: workspace
|
||||
persistentVolumeClaim:
|
||||
claimName: rnaseq-workspace
|
||||
@@ -0,0 +1,52 @@
|
||||
# 04b-job-star-extra.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: star-align-extra
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
containers:
|
||||
- name: star
|
||||
image: quay.io/biocontainers/star:2.7.10a--h9ee0642_0
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -e
|
||||
for acc in ERR458494 ERR458495 ERR458501 ERR458502; do
|
||||
echo "=== Aligning $acc ==="
|
||||
mkdir -p /data/aligned/$acc
|
||||
STAR \
|
||||
--runMode alignReads \
|
||||
--genomeDir /data/reference/star_index \
|
||||
--readFilesIn /data/raw/$acc.fastq.gz \
|
||||
--readFilesCommand zcat \
|
||||
--outSAMtype BAM SortedByCoordinate \
|
||||
--outSAMattributes NH HI AS NM \
|
||||
--outFileNamePrefix /data/aligned/$acc/ \
|
||||
--runThreadN 4
|
||||
done
|
||||
|
||||
echo "=== Done ==="
|
||||
for acc in ERR458494 ERR458495 ERR458501 ERR458502; do
|
||||
ls -lh /data/aligned/$acc/
|
||||
done
|
||||
resources:
|
||||
requests:
|
||||
memory: "6Gi"
|
||||
cpu: "4"
|
||||
limits:
|
||||
memory: "10Gi"
|
||||
cpu: "4"
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
|
||||
volumes:
|
||||
- name: workspace
|
||||
persistentVolumeClaim:
|
||||
claimName: rnaseq-workspace
|
||||
@@ -0,0 +1,48 @@
|
||||
# 05-job-featurecounts.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: featurecounts-v2
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
backoffLimit: 2
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
containers:
|
||||
- name: featurecounts
|
||||
image: quay.io/biocontainers/subread:2.0.6--he4a0461_0
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -e
|
||||
echo "=== Running featureCounts on 6 samples ==="
|
||||
featureCounts \
|
||||
-a /data/reference/annotation.gtf \
|
||||
-o /data/counts/gene_counts.txt \
|
||||
-T 4 \
|
||||
/data/aligned/ERR458493/Aligned.sortedByCoord.out.bam \
|
||||
/data/aligned/ERR458494/Aligned.sortedByCoord.out.bam \
|
||||
/data/aligned/ERR458495/Aligned.sortedByCoord.out.bam \
|
||||
/data/aligned/ERR458500/Aligned.sortedByCoord.out.bam \
|
||||
/data/aligned/ERR458501/Aligned.sortedByCoord.out.bam \
|
||||
/data/aligned/ERR458502/Aligned.sortedByCoord.out.bam
|
||||
|
||||
echo "=== Done ==="
|
||||
cat /data/counts/gene_counts.txt.summary
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "2"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "4"
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
|
||||
volumes:
|
||||
- name: workspace
|
||||
persistentVolumeClaim:
|
||||
claimName: rnaseq-workspace
|
||||
@@ -0,0 +1,69 @@
|
||||
# 06-job-deseq2.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: deseq2-v2
|
||||
namespace: rnaseq
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
|
||||
containers:
|
||||
- name: deseq2
|
||||
image: quay.io/biocontainers/bioconductor-deseq2:1.46.0--r44he5774e6_1
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -e
|
||||
cat <<'EOF' > /tmp/deseq2_analysis.R
|
||||
library(DESeq2)
|
||||
|
||||
counts_raw <- read.table("/data/counts/gene_counts.txt",
|
||||
header = TRUE, skip = 1, row.names = 1)
|
||||
|
||||
counts <- counts_raw[, 6:11]
|
||||
colnames(counts) <- c("WT_1", "WT_2", "WT_3",
|
||||
"SNF2_1", "SNF2_2", "SNF2_3")
|
||||
|
||||
coldata <- data.frame(
|
||||
condition = factor(c("WT", "WT", "WT",
|
||||
"SNF2", "SNF2", "SNF2"),
|
||||
levels = c("WT", "SNF2"))
|
||||
)
|
||||
rownames(coldata) <- colnames(counts)
|
||||
|
||||
dds <- DESeqDataSetFromMatrix(countData = counts,
|
||||
colData = coldata,
|
||||
design = ~ condition)
|
||||
|
||||
dds <- DESeq(dds)
|
||||
res <- results(dds)
|
||||
res <- res[order(res$padj), ]
|
||||
|
||||
write.csv(as.data.frame(res), "/data/results/deseq2_results.csv")
|
||||
|
||||
cat("=== Summary ===\n")
|
||||
summary(res)
|
||||
|
||||
cat("\n=== Top 10 genes by adjusted p-value ===\n")
|
||||
print(head(as.data.frame(res), 10))
|
||||
EOF
|
||||
|
||||
Rscript /tmp/deseq2_analysis.R
|
||||
resources:
|
||||
requests:
|
||||
memory: "2Gi"
|
||||
cpu: "2"
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
cpu: "4"
|
||||
volumeMounts:
|
||||
- name: workspace
|
||||
mountPath: /data
|
||||
|
||||
volumes:
|
||||
- name: workspace
|
||||
persistentVolumeClaim:
|
||||
claimName: rnaseq-workspace
|
||||
@@ -0,0 +1,43 @@
|
||||
# RNA-seq DESeq2 Pipeline
|
||||
|
||||
A sequential Kubernetes Job pipeline for differential expression analysis on yeast RNA-seq data. Each stage runs as a one-shot Job against a shared PVC, in order.
|
||||
|
||||
## Dataset
|
||||
|
||||
- Source: Gierliński et al., ENA accession PRJEB5348
|
||||
- Reads: 50bp single-end
|
||||
- Conditions: wild-type (WT: ERR458493–495) vs. snf2 deletion mutant (snf2: ERR458500–502)
|
||||
|
||||
## Pipeline stages
|
||||
|
||||
| Order | File | Stage |
|
||||
|---|---|---|
|
||||
| 1 | `01-pvc.yaml` | Shared PersistentVolumeClaim for pipeline data and intermediate files |
|
||||
| 2 | `02-job-sra-download.yaml` | Downloads raw FASTQ reads from SRA/ENA |
|
||||
| 2b | `02b-job-sra-download-extra.yaml` | Downloads the remaining replicate samples |
|
||||
| 3 | `03-job-fastqc.yaml` | FastQC read quality control |
|
||||
| 4 | `04-job-star.yaml` | STAR alignment to the reference genome |
|
||||
| 4b | `04b-job-star-extra.yaml` | STAR alignment for the remaining replicate samples |
|
||||
| 5 | `05-job-featurecounts.yaml` | Gene-level count matrix from aligned reads |
|
||||
| 6 | `06-job-deseq2.yaml` | DESeq2 differential expression analysis (WT vs. snf2) |
|
||||
|
||||
## Results
|
||||
|
||||
- STAR alignment: ~85–90% mapping rate across samples
|
||||
- DESeq2 output visualized (volcano plot, etc.) in a Jupyter R notebook
|
||||
|
||||
## Running
|
||||
|
||||
Namespace: `rnaseq`. Jobs are sequential — each depends on the previous stage's output landing on the shared PVC, so apply and wait for completion before moving to the next:
|
||||
|
||||
```bash
|
||||
kubectl apply -f 01-pvc.yaml
|
||||
kubectl apply -f 02-job-sra-download.yaml
|
||||
kubectl get jobs -n rnaseq -w # wait for Completed before continuing
|
||||
kubectl apply -f 02b-job-sra-download-extra.yaml
|
||||
kubectl apply -f 03-job-fastqc.yaml
|
||||
kubectl apply -f 04-job-star.yaml
|
||||
kubectl apply -f 04b-job-star-extra.yaml
|
||||
kubectl apply -f 05-job-featurecounts.yaml
|
||||
kubectl apply -f 06-job-deseq2.yaml
|
||||
```
|
||||
Reference in New Issue
Block a user