*****Click on the gear above and choose: “Chunk Output in Console” before proceeding*****

Read in and Melt Raw Data

Read in the individual csv’s for each fluorphore, via searching the input directory for filenames containing the names of the fluors used in this experiment. This generates a melted data frame containing the concatenated data sets from each fluorophore.

fileNames <- list.files(path = fluorInput, pattern = "*.csv", full.names = TRUE)
dataList <- lapply(fluorList, function(x) 
    {read.csv(file = fileNames[grep(x, fileNames)], stringsAsFactors = FALSE)})
names(dataList) <- fluorList
dataList <- lapply(dataList, function(x) {colnames(x)[1] <- "Well";x}) 
allFluors <- melt(dataList, id=colnames(dataList[[1]]))
colnames(allFluors)[length(colnames(allFluors))] <- "Fluor"
names(allFluors)[names(allFluors) == "X.Area"] <- "Area"

rm(dataList)

Calculate AID

Note: Area is divided by Max % Fill per ROI, which has already been calculated for this master/array design as 9 pixels by 17 pixels, and ROI grid is 400 pixels, so the max % Fill per ROI is 38.25%

wellTotal <- 1024  # Total number of wells in a single array
allFluors$Array[allFluors$Well <= wellTotal] <- "A1"
allFluors$Array[allFluors$Well >= wellTotal+1 & allFluors$Well <= 2*wellTotal] <- "A2"
allFluors$Array[allFluors$Well > 2*wellTotal] <- "A3"

allFluors$AID <- allFluors$IntDen/(allFluors$Area/38.25)
allFluors$Fluor <- factor(allFluors$Fluor, levels = fluorList)
allFluors$Array <- factor(allFluors$Array)
allFluors <- allFluors %>% mutate(Template = "OCI-AML3")

Data formatting to generate a “spread” format for further analysis.

Uses the area calculated in the ampFluor channel as the true Area from here on out.

procData <- allFluors[c("Well", "Fluor", "Array", "AID")]
ampArea <- allFluors %>% filter(Fluor == ampFluor) %>% select(Well, Area, Array, Template)

wideForm <- procData %>% group_by(Array) %>% spread(Fluor, AID) 
colnames(wideForm)[which(grepl(ampFluor, colnames(wideForm)))] <- "ampFluor"
colnames(wideForm)[which(grepl(wtFluor, colnames(wideForm)))] <- "wtFluor"
colnames(wideForm)[which(grepl(mutFluor, colnames(wideForm)))] <- "mutFluor"

primeTime <- merge(ampArea, wideForm)
primeTime <- primeTime[order(primeTime$Well),] 
primeTime$ArrayRow <- rep(1:16, each = 64)
primeTime$ArrayCol <- seq(1:64)

rm(procData, ampArea, wideForm)

Fill Thresholding

Plot data as a function of area in order to visualize the area threshold for identifying which wells “filled” and which did not prior to downstream gating.

Visualize the suggested threshold as well as the chosen threshold to confirm.

tArea <- 21
ggplot(data = primeTime %>% filter(Area > 5), aes(x=Area)) +
    geom_density() + facet_grid(. ~ Array) + 
    geom_vline(xintercept = tArea, color = "springgreen4") +
    theme_bw()

Apply one area threshold to the data frame for all arrays and generate a new “ruleThemAll”.

ruleThemAll <- primeTime %>% group_by(Array) %>% mutate(Filled = Area > tArea)

HEX Correction and Thresholding

Set the constant used to correct for FAM bleed-through in the HEX channel. This value was calcuated based on the average of several runs of mutant plasmids.

avgHEXSlope <- 0.46
#snowCone <- mutate(ruleThemAll, runID = "OCI-AML3 Cells")

snowCone <- mutate(ruleThemAll, mutFluorAdj = mutFluor - avgHEXSlope*ampFluor)
#snowCone$mutFluorAdj <- NA

Required for thresholding: Calculate max of density curve

#set approximate range of negative wells to get more accurate denisity curve
AmpMax    <- 1100000
MutMax    <- 2000000
WtMax     <- 1500000
MutAdjMax <- 1000000
WtAdjMax  <- 1500000

#rearrange columns to make gather work
snowCone <- select(snowCone, Well, Array, Area, Template, ampFluor, wtFluor, mutFluor, mutFluorAdj, ArrayRow, ArrayCol, Filled)

#function to find the max density
Peak <- function(X) {
      Y = density(X, na.rm = TRUE); 
      Z = data.frame(Intensity = Y$x, Density = Y$y);
    return(Z$Intensity[which.max(Z$Density)])
}

#make a table with max AID's for each Fluor
snowPeaks <- snowCone %>% filter(ampFluor < AmpMax, mutFluor < MutMax, wtFluor < WtMax) %>% gather(Fluor, AID, ampFluor:mutFluor) %>% 
          filter(Filled == TRUE) %>% 
          group_by(Array, Fluor) %>% 
          do(data.frame(maxAID = Peak(.$AID)))

mutAdjPeaks <- snowCone %>% gather(Fluor, AID, ampFluor:mutFluorAdj) %>% 
          filter(Filled == TRUE, Fluor == "mutFluorAdj", AID < MutAdjMax) %>% 
          group_by(Array, Fluor) %>% 
          do(data.frame(maxAID = Peak(.$AID)))

snowPeaksAdj <- bind_rows(snowPeaks, mutAdjPeaks)

Calculate thresholds using previously determined maxSD of negative controls

xsigamp <- 3
xsigmut <- 6
xsigwt <- 5

PSwapampSD    <- 41753*xsigamp
PSwapwtSD     <- 29937*xsigwt
PSwapmutAdjSD <- 52998*xsigmut

#Add thresholds to snowPeaksAdj
snowDrift <- snowPeaksAdj %>% mutate(Threshold = if_else( Fluor == "ampFluor", maxAID + PSwapampSD,
                                                  if_else( Fluor == "wtFluor", maxAID + PSwapwtSD,
                                                  if_else( Fluor == "mutFluorAdj", maxAID + PSwapmutAdjSD,
                                                          0))))

#Add this threshold to the df with all the AID's
snowDrift1 <- snowDrift %>% select(Array, Fluor, Threshold) %>% spread(Fluor, Threshold) %>% select(Array, ampThresh = ampFluor, mutThresh = mutFluor, mutAdjThresh = mutFluorAdj, wtThresh = wtFluor)

snowCone1 <- left_join(snowCone, snowDrift1, by = c("Array"))

Visualize amplification thresholds to spot errors

ampThreshPlot <- ggplot(snowCone1 %>% filter(Filled == TRUE), aes(x=ampFluor)) + 
                            geom_histogram(aes(x = ampFluor, y = ..density..), bins = 20) +
                            geom_point(stat = "density", alpha = 0.6, stroke = 0) + 
                            geom_vline(aes(xintercept = ampThresh), color = "orchid4") +
                            xlim(500000, 1500000) +
                            xlab("AID") + ylab("Density") + 
                            facet_grid(.~ Array) +
                            ggtitle("amp Thresh") +
                            theme_bw() +
                            theme(strip.text.y = element_blank(), axis.text.x = element_text(angle=90, hjust=1))
ampThreshPlot

## Warning: Removed 134 rows containing non-finite values (stat_bin).

## Warning: Removed 134 rows containing non-finite values (stat_density).

View scatter plot of mutant fluorophore vs wild-type fluorophore intensity

mutAdj_vs_wt_Scatter <- ggplot(snowCone1 %>% filter(Filled == TRUE), 
            aes(x=wtFluor, y=mutFluorAdj)) + 
            geom_point(alpha = 0.6, stroke = 0, color = "orangered2") + 
            geom_vline(aes(xintercept = wtThresh), color = "orchid4") +
            geom_hline(aes(yintercept = mutAdjThresh), color = "orchid4") +
            xlim(0,max(snowCone1$wtFluor, na.rm = TRUE)) +
            ylim(0,max(snowCone1$mutFluorAdj, na.rm = TRUE)) +
            labs(x = "wtFluor", y = "mutFluorAdj") +
            facet_grid(. ~ Array) +
            ggtitle("Well Intensities, with HEX Correction") +
            theme_bw() +
            theme(strip.text.y = element_blank(), axis.text.x = element_text(angle=90, hjust=1))
mutAdj_vs_wt_Scatter

Save a .jpeg of the mut vs. wt scatter plots

ggsave(mutAdj_vs_wt_Scatter, filename = paste0(output,"mutAdj_vs_wt_Scatter.jpeg"))

## Saving 7 x 5 in image

Determine amplification positivity for each well

snowCone2 <- mutate(snowCone1, AmpPos = FALSE)

snowCone2 <- snowCone2 %>% mutate(AmpPos = ampFluor >= ampThresh)

snowCone2$AmpPos <- as.factor(snowCone2$AmpPos)

Determine Zygosities in the individual arrays

snowCone3 <- mutate(snowCone2, Zygosity = "UNCALLED")

snowCone3$wtFluor[is.na(snowCone3$wtFluor)] <- 0
snowCone3$mutFluor[is.na(snowCone3$mutFluor)] <- 0
snowCone3$mutFluorAdj[is.na(snowCone3$mutFluorAdj)] <- 0
snowCone3$ampFluor[is.na(snowCone3$ampFluor)] <- 0


snowCone3 <- snowCone3 %>% mutate(Zygosity = replace(Zygosity, Filled == TRUE & AmpPos == TRUE  & 
                     wtFluor >= wtThresh & mutFluorAdj < mutAdjThresh, "WT"))

snowCone3 <- snowCone3 %>% mutate(Zygosity = replace(Zygosity, Filled == TRUE & AmpPos == TRUE  &
                     wtFluor < wtThresh & mutFluorAdj >= mutAdjThresh, "MUT"))

snowCone3 <- snowCone3 %>% mutate(Zygosity = replace(Zygosity, Filled == TRUE & AmpPos == TRUE  &
                     wtFluor >= wtThresh & mutFluorAdj >= mutAdjThresh, "HET"))

snowCone3$Zygosity <- factor(snowCone3$Zygosity, levels = c("WT", "HET", "MUT","UNCALLED"))

snowCone3 <- mutate(snowCone3, FluorQC = factor("ThisIsFine", levels = c("wtFluor", "mutFluor", "Both", "AmpOnly", "Negative", "ThisIsFine")))


snowCone3 <- snowCone3 %>% mutate(FluorQC = replace(FluorQC, Filled == TRUE & AmpPos == FALSE  &
                     wtFluor >= wtThresh & mutFluorAdj < mutAdjThresh, "wtFluor"))

snowCone3 <- snowCone3 %>% mutate(FluorQC = replace(FluorQC, Filled == TRUE & AmpPos == FALSE  & 
                     wtFluor < wtThresh & mutFluorAdj >= mutAdjThresh, "mutFluor"))

snowCone3 <- snowCone3 %>% mutate(FluorQC = replace(FluorQC, Filled == TRUE & AmpPos == FALSE & 
                     wtFluor >= wtThresh & mutFluorAdj >= mutAdjThresh, "Both"))

snowCone3 <- snowCone3 %>% mutate(FluorQC = replace(FluorQC, Filled == TRUE & AmpPos == TRUE & 
                     wtFluor < wtThresh & mutFluorAdj < mutAdjThresh, "AmpOnly"))

snowCone3 <- snowCone3 %>% mutate(FluorQC = replace(FluorQC, Filled == TRUE & AmpPos == FALSE & 
                     wtFluor < wtThresh & mutFluorAdj < mutAdjThresh, "Negative"))

Plot Zygosity Results

Plot mut vs. wt with Zygosity

FinalZygosity_PSwap <- ggplot(snowCone3 %>% filter(Filled ==TRUE, AmpPos == TRUE | FluorQC == "Negative"), 
            aes(x=wtFluor, y=mutFluorAdj)) + 
            geom_point(aes(color = Zygosity),alpha = 0.4, stroke = 0) + 
            geom_vline(aes(xintercept = wtThresh), color = "springgreen4") +
            geom_hline(aes(yintercept = mutAdjThresh), color = "springgreen4") +
        scale_color_manual(values = c("dodgerblue4","magenta4", "red3", "black"), limits = levels(snowCone3$Zygosity)) +
            xlim(0,max(snowCone3$wtFluor, na.rm = TRUE)) +
            ylim(-200000,max(snowCone3$mutFluorAdj, na.rm = TRUE)) +
            labs(x = "wtFluor", y = "mutFluorAdj") +
            facet_grid(. ~ Array) +
            ggtitle("PSwap Zygosity") +
            theme_bw() +
            theme(axis.text.x = element_text(angle=90, hjust=1), strip.text.y = element_text(size = 7, angle = 0))
FinalZygosity_PSwap

Save a .jpeg of the mut vs. wt Zygosity and FluorQC scatter plots

ggsave(FinalZygosity_PSwap, filename = paste0(output, "FinalZygosity_PSwap.jpeg"))

## Saving 7 x 5 in image

Cell Calculations

Read In Cell Data

filenames <- list.files(path = cellInput, pattern = ".csv", full.names = T)
images <- as.numeric(str_sub(gsub(".csv", "", filenames), -2,-1))
cellData <- lapply(filenames, function(x) read.csv(x, header = T))
names(cellData) <- images
cellMelt <- melt(cellData, id = colnames(cellData[[1]]))
colnames(cellMelt) <- c("imageWell", "cellCount", "imageId")
cellMelt$ROIorder <- seq(1:3072)

lookup <- data.frame(ROIorder = 1:3072)
lookup$ArrayCol <- rep(c(rep(1:11, len = 88), rep(12:22, len = 88), 
                         rep(23:33, len = 88), rep(34:44, len = 88), 
                         rep(45:55, len = 88), rep(56:64, len = 144),
                         rep(45:55, len = 88), rep(34:44, len = 88), 
                         rep(23:33, len = 88), rep(12:22, len = 88), 
                         rep(1:11, len = 88)), 3)
lookup$ArrayRow <- rep(c(rep(1:8, each = 11, len = 440), 
                         rep(1:8, each = 9),rep(9:16, each = 9), 
                         rep(9:16, each = 11, len = 440)),3)
lookup$estImageId <- c(rep(1:5, each = 88), rep(6:7, each = 72), 
                       rep(8:17, each = 88), rep(18:19, each = 72), 
                       rep(20:29, each = 88), rep(30:31, each = 72), 
                       rep(32:36, each = 88))
lookup$Array <- rep(c("A1", "A2", "A3"), each = 1024)
lookup <- arrange(lookup, Array, ArrayRow, ArrayCol)
lookup$Well <- seq(1:3072)
fullMergeData <- join(lookup, cellMelt, by = "ROIorder", type = "left")
cellsReadyToRoll <- select(fullMergeData, Well, cellCount)
snowCone4 <- left_join(snowCone3, cellsReadyToRoll, by = c("Well"))

View the zygosities of the single cells per each array

set1 <- brewer.pal(n = 9, "Set1")
set3 <- brewer.pal(n = 12, "Set3")
zygcolors <- c(set3[5], set3[10], set3[4], "black", "white")

zygBarPlot <- ggplot(data = snowCone4 %>% filter(Zygosity != "UNCALLED" & cellCount == 1),
                     aes(x = Zygosity, fill = Zygosity, group = Array), color = "black") +
                      geom_bar(aes(y = ..prop.., fill = factor(..x..))) + 
                      ylim(0, 1) +
                      scale_fill_manual(values = zygcolors) + theme_bw() +
                      geom_text(aes(label = ..count.., y = ..prop..), stat = "count", vjust = -0.5) +
                      labs(x = "Zygosity", y = "Frequency") + 
                      scale_x_discrete(limits=c("WT", "HET", "MUT")) +
                      theme(legend.position = "none") +
                      facet_grid(. ~ Array) +
                      theme(strip.text.y = element_text(size = 7, angle = 0))
zygBarPlot

Save the zygosity plot

ggsave(paste0(output, "zygBarPlot-singleCells.jpg"),
       zygBarPlot,
       width = 6, height = 6)

Failure rate calculations

failureData <- snowCone4 %>% group_by( Array, Template) %>%
    summarise(FalsePositives = n_distinct(which(Filled == TRUE & AmpPos == TRUE & cellCount == 0)),
              FalseNegatives = n_distinct(which(Filled == TRUE & AmpPos == FALSE & cellCount == 1)),
              TruePositives = n_distinct(which(Filled == TRUE & AmpPos == TRUE & cellCount == 1)),
              TrueNegatives = n_distinct(which(Filled == TRUE & AmpPos == FALSE & cellCount == 0)),
              DoubletPlus = n_distinct(which(Filled == TRUE & cellCount > 1)),
              EmptyWells = n_distinct(which(Filled == FALSE)),
              FDR = FalsePositives/(FalsePositives + TruePositives),
              FPR = FalsePositives/(FalsePositives + TrueNegatives),
              FNR = FalseNegatives/(FalseNegatives + TruePositives)
              )

View stacked bar plot of failure data

failColors <- brewer.pal(6, "Accent")
failurePlotCounts <- melt(failureData %>% select(-c(FDR, FPR, FNR)), variable.name = "Mode", value.name = "WellCount")

## Using Array, Template as id variables

failurePlot <- ggplot(data = failurePlotCounts) + 
  geom_bar(aes(x = Array, y = WellCount, fill = Mode), stat = "identity") + 
  scale_fill_manual(values = failColors) +
  scale_y_continuous(expand = c(0, 10)) +
  theme_bw()
failurePlot

save plot

ggsave(paste0(output, "failurePlot.jpg"),
       failurePlot,
       width = 6, height = 6)

save csv

write.csv(failureData, file = paste0(output,"failureRateResults.csv"), row.names = F)

Poisson Distribution calculations

experimentalLambda <- snowCone4 %>% group_by(Array,Template) %>%
    summarise(TotalCounts =   n_distinct(which(Filled == TRUE)),
              ZeroCells =     n_distinct(which(Filled == TRUE & cellCount == 0)),
              OneCell =       n_distinct(which(Filled == TRUE & cellCount == 1)),
              DoubletPlus =   n_distinct(which(Filled == TRUE & cellCount > 1)),
              ExpLambda =    -log(ZeroCells/TotalCounts),
              PredSingles =  TotalCounts*ExpLambda*exp(-ExpLambda)/1,
              PredDoubPlus = TotalCounts - ZeroCells - PredSingles
              )

View stacked barplot of cell counts

cellNumPlot <- melt(experimentalLambda %>% select(Array, ZeroCells, OneCell, DoubletPlus), variable.name = "Mode", value.name = "CellCount")

## Using Array as id variables

CellCountPlot <- ggplot(data = cellNumPlot) + 
  geom_bar(aes(x = Array, y = CellCount, fill = Mode), stat = "identity") +
  theme_bw()
CellCountPlot

Compare estimated and experimental zero/single/doublet+ counts, per array

PoissonTest <- experimentalLambda %>% gather("Occupancy", "Counts", ZeroCells:DoubletPlus) %>% select(Array:ExpLambda, Occupancy, Counts) %>% mutate(Origin = "Actual")
PoissonTest2 <- experimentalLambda %>% mutate(PredZero = ZeroCells) %>% select(Array:TotalCounts, ExpLambda, "OneCell"= PredSingles, "DoubletPlus" = PredDoubPlus, "ZeroCells" = PredZero) %>% gather("Occupancy", "Counts", OneCell:ZeroCells) %>% mutate(Origin = "Predicted")
PoissonFull <- PoissonTest %>% bind_rows(PoissonTest2)                                                                                                  

PoissonFit <- ggplot(data = PoissonFull, aes(x = Occupancy, y = Counts, fill = Origin)) + 
  geom_bar(stat = "identity", position = position_dodge()) + 
  scale_x_discrete(limits=c("ZeroCells", "OneCell", "DoubletPlus")) +
  facet_grid(.~ Array) +
  ggtitle("Actual and Poisson-Predicted Cell Counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90, hjust=1), strip.text.y = element_text(size = 7, angle = 0))
PoissonFit

Compare estimated and experimental zero/single/doublet+ counts, all data added together

PoissonSummary <- PoissonFull %>% group_by(Origin, Occupancy) %>% summarise(Counts = sum(Counts))

PoissonSummaryPlot <- ggplot(data = PoissonSummary, aes(x = Occupancy, y = Counts, fill = Origin)) + 
  geom_bar(stat = "identity", position = position_dodge()) + 
  scale_x_discrete(limits=c("ZeroCells", "OneCell", "DoubletPlus")) +
  ggtitle("Combined Actual and Poisson-Predicted Cell Counts") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90, hjust=1))
PoissonSummaryPlot

Save a .jpeg of the Poisson plots

ggsave(PoissonFit, filename = paste0(output,"PoissonFit.jpeg"))

## Saving 7 x 5 in image

ggsave(PoissonSummaryPlot, filename = paste0(output,"PoissonSummaryPlot.jpeg"))

## Saving 7 x 5 in image

ggsave(CellCountPlot, filename = paste0(output,"CellCountPlot.jpeg"))

## Saving 7 x 5 in image

Save csv’s of cell count distribution data

write.csv(experimentalLambda, file = paste0(output,"poissionEstimateResults.csv"), row.names = F)
write.csv(PoissonFull, file = paste0(output,"PoissonFull.csv"), row.names = F)
write.csv(PoissonSummary, file = paste0(output,"PoissonSummary.csv"), row.names = F)

Save the final data frame

write.csv(snowCone4, file = paste0(output,"cellsnowCone.csv"), row.names = F)

SD Single Cell Genotyping Data Reduction

Initialize libraries and Directory setup

Run Metadata