Files
rnaseq-pipeline/06-job-deseq2.yaml
T

70 lines
2.0 KiB
YAML

# 06-job-deseq2.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: deseq2-v2
namespace: rnaseq
spec:
backoffLimit: 1
template:
spec:
restartPolicy: Never
containers:
- name: deseq2
image: quay.io/biocontainers/bioconductor-deseq2:1.46.0--r44he5774e6_1
command: ["/bin/sh", "-c"]
args:
- |
set -e
cat <<'EOF' > /tmp/deseq2_analysis.R
library(DESeq2)
counts_raw <- read.table("/data/counts/gene_counts.txt",
header = TRUE, skip = 1, row.names = 1)
counts <- counts_raw[, 6:11]
colnames(counts) <- c("WT_1", "WT_2", "WT_3",
"SNF2_1", "SNF2_2", "SNF2_3")
coldata <- data.frame(
condition = factor(c("WT", "WT", "WT",
"SNF2", "SNF2", "SNF2"),
levels = c("WT", "SNF2"))
)
rownames(coldata) <- colnames(counts)
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ condition)
dds <- DESeq(dds)
res <- results(dds)
res <- res[order(res$padj), ]
write.csv(as.data.frame(res), "/data/results/deseq2_results.csv")
cat("=== Summary ===\n")
summary(res)
cat("\n=== Top 10 genes by adjusted p-value ===\n")
print(head(as.data.frame(res), 10))
EOF
Rscript /tmp/deseq2_analysis.R
resources:
requests:
memory: "2Gi"
cpu: "2"
limits:
memory: "4Gi"
cpu: "4"
volumeMounts:
- name: workspace
mountPath: /data
volumes:
- name: workspace
persistentVolumeClaim:
claimName: rnaseq-workspace