#' AnalysisHandle #' @description Object for handling database operations related to the storage and #' retrieval of Analysis objects. #' @field dbConnection A dbConnection object #' @export AnalysisHandle <- R6Class( classname = "AnalysisHandle", lock_class = TRUE, public = list( dbConnection = NULL, #' @description Create an object for handling database operations related to #' the storage and retrieval of Analysis objects. #' @param dbConnection A dbConnection object initialize = function(dbConnection = NA) { self$dbConnection <- dbConnection }, #' @description Retrieve an Analysis object using its database ID #' @param dbID Database ID of the Analysis object to retrieve #' @return An Analysis object get_by_id = function(dbID) { query <- "SELECT * FROM Analysis WHERE ID = ?" df <- self$dbConnection$get_data(query, list(dbID)) if(nrow(df$ID)>0) { analysis <- analysis$new(dbConnection = self$dbConnection, ID = df$ID, name = df$name, version = df$version, description = df$description, outcome = df$outcome, status = df$status, code = df$code) return(analysis) } else { return(NULL) } }, #' @description Retrieve an analysis using an associated data file #' @param datafile A Datafile object #' @param data_as_input logical (default = FALSE), whether the data is an #' input to the analysis workflow #' @return An Analysis object or a list of Analysis objects if data_as_input = TRUE get_by_data = function(datafile, data_as_input = FALSE) { if(data_as_input) { # Analyses that consumed this data query <- "SELECT DISTINCT A.* FROM Analysis AS A, Analysis_has_Data AS AhD WHERE AhD.Parent_ID = ? AND AhD.Analysis_ID = A.ID" data <- self$dbConnection$get_data(query, list(screen$ID)) if(length(data$ID)>0) { analyses <- list() df.list <- split(data, data$ID) for(df in df.list) { a <- Analysis$new(dbConnection = self$dbConnection, ID = df$ID, name = df$name, version = df$version, description = df$description, outcome = df$outcome, status = df$status, code = df$code) analyses <- c(analyses, a) } return(analyses) } else { return(NULL) } } else { # Analysis that produced this data. # There should be only one. query <- "SELECT DISTINCT A.* FROM Analysis AS A, Analysis_has_Data AS AhD WHERE AhD.Child_ID = ? AND AhD.Analysis_ID = A.ID" df <- self$dbConnection$get_data(query, list(datafile$ID)) if(length(df$ID) == 1) { analysis <- Analysis$new(dbConnection = self$dbConnection, ID = df$ID, name = df$name, version = df$version, description = df$description, outcome = df$outcome, status = df$status, code = df$code) return(analysis) } else if(length(df$ID) == 0) { return(NULL) } else { # Shouldn't happen stop("Datafile produced by more than one analysis") } } }, #' @description Retrieve all analyses associated with a screen #' @param screen A Screen object #' @return List of Analysis objects get_all_by_screen = function(screen) { query <- "SELECT A.* FROM Analysis AS A, Screen_has_Analysis AS ShA WHERE ShA.Screen_ID = ? AND ShA.Analysis_ID = A.ID" data <- self$dbConnection$get_data(query, list(screen$ID)) if(length(data$ID)>0) { analyses <- list() df.list <- split(data, data$ID) for(df in df.list) { a <- Analysis$new(dbConnection = self$dbConnection, ID = df$ID, name = df$name, version = df$version, description = df$description, outcome = df$outcome, status = df$status, code = df$code) analyses <- c(analyses, a) } return(analyses) } else { return(NULL) } }, #' @description Retrieve all analyses associated with a screen plate #' @param plate A Plate object #' @return List of Analysis objects get_all_by_plate = function(plate) { query <- "SELECT A.* FROM Analysis AS A, Analysis_has_Data AS AhD, Plate_has_Data AS PhD WHERE PhD.Plate_ID = ? AND (PhD.Datafile_ID = AhD.Parent_ID OR PhD.Datafile_ID = AhD.Child_ID) AND AhD.Analysis_ID = A.ID" data <- self$dbConnection$get_data(query, list(plate$ID)) if(length(data$ID)>0) { analyses <- list() df.list <- split(data, data$ID) for(df in df.list) { a <- Analysis$new(dbConnection = self$dbConnection, ID = df$ID, name = df$name, version = df$version, description = df$description, outcome = df$outcome, status = df$status, code = df$code) analyses <- c(analyses, a) } return(analyses) } else { return(NULL) } }, #' @description Generate a database ID for an Analysis object #' @return A string starting with the prefix ANA: get_new_ID = function() { query <- "SELECT seq FROM Analysis ORDER BY seq DESC LIMIT 1" df <- self$dbConnection$get_data(query) if(nrow(df)>0) { ID <- paste0("ANA:", sprintf("%012d", df$seq+1)) } else { # The table is empty ID <- paste0("ANA:", sprintf("%012d",1)) } return(ID) }, #' @description Store Analysis objects in the database #' @param analysis.list A list of Analysis objects store = function(analysis.list) { statement <- "INSERT IGNORE INTO Analysis (ID, name, version, description, outcome, status, code) VALUES (?, ?, ?, ?, ?, ?, ?)" for(analysis in analysis.list) { result <- self$dbConnection$execute(statement, list(analysis$ID, analysis$name, analysis$version, analysis$description, analysis$outcome, analysis$status, analysis$code)) if(length(analysis$input_datafiles)>0) { dfh <- DatafileHandle$new(self$dbConnection) result <- dfh$store(analysis$input_datafiles) for(datafile in analysis$input_datafiles) { st <- "INSERT IGNORE INTO Analysis_has_Data (Analysis_ID, Parent_ID) VALUES (?, ?)" r <- self$dbConnection$execute(st, list(plate$ID, datafile$ID)) } } if(length(analysis$output_datafiles)>0) { dfh <- DatafileHandle$new(self$dbConnection) result <- dfh$store(analysis$output_datafiles) for(datafile in analysis$output_datafiles) { st <- "INSERT IGNORE INTO Analysis_has_Data (Analysis_ID, Child_ID) VALUES (?, ?)" r <- self$dbConnection$execute(st, list(plate$ID, datafile$ID)) } } } } ) )