library(ggplot2) library(cowplot) library(dplyr) library(scales) library(gridExtra) library(grid) library(ggplotify) library(data.table) palette <- c('GENCODE_protein_coding' = '#009900', 'CLS_TMs' = '#cc9966', 'CLS_FL_TMs' = '#cc00cc') dat<-fread('output/statsFiles/all.min2reads.matureRNALength.stats.tsv.gz', header=T, sep='\t') dat <- subset(dat, seqTech=='pacBioSII-Uci-SmartSeq2') dat <- subset(dat, capDesign=='HpreCap') dat <- subset(dat, sizeFrac=='0+') dat <- subset(dat, sampleRep=='WTC1102Rep1') dat$seqTech <- gsub('-', '\n', dat$seqTech) dat$sampleRep <- gsub('HpreCap_', '', dat$sampleRep) horizCats <- length(unique(dat$capDesign)) * length(unique(dat$sampleRep)) vertCats <- length(unique(dat$seqTech)) wXyPlot = (horizCats * 0.9) +1.7 hXyPlot = (vertCats * 0.6) + 1.7 geom_textSize=1.4 themeSize = (14/5) * geom_textSize # https://stackoverflow.com/questions/25061822/ggplot-geom-text-font-size-control/25062509 lineSize=geom_textSize/8 minorLineSize=lineSize/2 dat <- subset(dat, spliced==0) wXyPlot = wXyPlot * 1.2 dat %>% group_by(seqTech, sizeFrac, capDesign, sampleRep, category) %>% summarise(med=median(mature_RNA_length)) -> datSumm summaryStats = transform(datSumm, LabelM = comma(round(med, digits = 0))) geom_textSize = geom_textSize + 1 plotBase <- "p <- ggplot(dat, aes(x=mature_RNA_length, fill=category)) + geom_histogram(binwidth=100, alpha=0.35, position='identity') + geom_vline(data=summaryStats, aes(xintercept=med, color=category), size=lineSize, show.legend=FALSE) + geom_text(data = summaryStats[summaryStats$category=='CLS_TMs',], aes(label = LabelM, x = Inf, y = Inf, color=category), hjust=1.2, vjust=1, size=geom_textSize, show.legend=FALSE) + geom_text(data = summaryStats[summaryStats$category=='CLS_FL_TMs',], aes(label = LabelM, x = Inf, y = Inf, color=category), hjust=1.2, vjust=2.1, size=geom_textSize, show.legend=FALSE) + coord_cartesian(xlim=c(0, 3000)) + scale_x_continuous(labels=comma)+ scale_color_manual(values=palette, name='Category', labels = c('GENCODE_protein_coding' = 'GENCODE protein-coding', 'CLS_TMs'='TMs', 'CLS_FL_TMs'='FL TMs')) + scale_fill_manual(values=palette, name='Category', labels = c('GENCODE_protein_coding' = 'GENCODE protein-coding', 'CLS_TMs'='TMs', 'CLS_FL_TMs'='FL TMs')) + xlab('Mature RNA length') + theme(axis.text= element_text(size=themeSize*1.8), axis.ticks = element_line(size=lineSize), axis.line = element_line(colour = '#595959', size=lineSize), axis.title=element_text(size = themeSize*2), panel.grid.major = element_line(colour='#d9d9d9', size=lineSize),panel.grid.minor = element_line(colour='#e6e6e6', size=minorLineSize),panel.border = element_blank(),panel.background = element_blank(), strip.background = element_rect(colour='#737373',fill='white'), legend.key.size=unit(0.5,'line'), legend.title=element_text(size=themeSize*1.2), legend.text=element_text(size=themeSize), strip.text = element_text(size = themeSize)) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + " plotFacetXy <- parse(text =paste(plotBase, "facet_grid( seqTech ~ capDesign + sampleRep, scales='free_y')")) plotFacetYx <- parse(text=paste(plotBase, "facet_grid( capDesign + sampleRep ~ seqTech, scales='free_y')")) plotFacetXy <- parse(text =paste(plotFacetXy, " + theme(strip.text.x = element_blank(), strip.text.y = element_blank())")) plotFacetYx <- parse(text=paste(plotFacetYx, " + theme(strip.text.x = element_blank(), strip.text.y = element_blank())")) pXy <- eval(plotFacetXy) pYx <- eval(plotFacetYx) legend <- get_legend(pXy) pXyNoLegend <- pXy + theme(legend.position='none') pYxNoLegend <- pYx + theme(legend.position='none') legendOnly <- grid.arrange(legend) pXyGrob <- as.grob(pXy) pYxGrob <- as.grob(pYx) pXyNoLegendGrob <- as.grob(pXyNoLegend) pYxNoLegendGrob <- as.grob(pYxNoLegend) hLegendOnly <- convertUnit(sum(legend$heights), 'in', valueOnly=TRUE) wLegendOnly <- convertUnit(sum(legend$widths), 'in', valueOnly=TRUE) hYxPlot <- wXyPlot wYxPlot <- hXyPlot hXyNoLegendPlot<- hXyPlot wXyNoLegendPlot<- wXyPlot - wLegendOnly hYxNoLegendPlot<- hYxPlot wYxNoLegendPlot<- wYxPlot - wLegendOnly wYxPlot = wYxPlot * 1.2 wYxNoLegendPlot<- wYxPlot - wLegendOnly save_plot('output/plots/matureRNALength.stats/pacBioSII-Uci-SmartSeq2/HpreCap/pacBioSII-Uci-SmartSeq2_HpreCap_0+_WTC1102Rep1.min2reads.splicing_status-unspliced.matureRNALength.hist.stats.legendOnly.png', legendOnly, base_width=wLegendOnly, base_height=hLegendOnly) save_plot('output/plots/matureRNALength.stats/pacBioSII-Uci-SmartSeq2/HpreCap/pacBioSII-Uci-SmartSeq2_HpreCap_0+_WTC1102Rep1.min2reads.splicing_status-unspliced.matureRNALength.hist.stats.xy.wLegend.png', pXy, base_width=wXyPlot, base_height=hXyPlot) save_plot('output/plots/matureRNALength.stats/pacBioSII-Uci-SmartSeq2/HpreCap/pacBioSII-Uci-SmartSeq2_HpreCap_0+_WTC1102Rep1.min2reads.splicing_status-unspliced.matureRNALength.hist.stats.xy.woLegend.png', pXyNoLegend, base_width=wXyNoLegendPlot, base_height=hXyNoLegendPlot) save_plot('output/plots/matureRNALength.stats/pacBioSII-Uci-SmartSeq2/HpreCap/pacBioSII-Uci-SmartSeq2_HpreCap_0+_WTC1102Rep1.min2reads.splicing_status-unspliced.matureRNALength.hist.stats.yx.wLegend.png', pYx, base_width=wYxPlot, base_height=hYxPlot) save_plot('output/plots/matureRNALength.stats/pacBioSII-Uci-SmartSeq2/HpreCap/pacBioSII-Uci-SmartSeq2_HpreCap_0+_WTC1102Rep1.min2reads.splicing_status-unspliced.matureRNALength.hist.stats.yx.woLegend.png', pYxNoLegend, base_width=wYxNoLegendPlot, base_height=hYxNoLegendPlot)