Merge branch 'development' into 'cwl-workflow'

# Conflicts: # CWL/tools/01_validate_data.cwl # CWL/tools/04_filter_features.cwl # CWL/tools/06_normalize_features.cwl # CWL/tools/07_add_metadata_as_predictor.cwl # CWL/tools/08_split_data.cwl # CWL/tools/09_train_models.cwl # CWL/tools/10_make_predictions.cwl # CWL/tools/11_evaluate_predictions.cwl # CWL/tools/12_interprete_model.cwl # CWL/workflows/siamcat_workflow.cwl # Rscript_flavor/00_setup.r

Merge branch 'development' into 'cwl-workflow'
# Conflicts: # CWL/tools/01_validate_data.cwl # CWL/tools/04_filter_features.cwl # CWL/tools/06_normalize_features.cwl # CWL/tools/07_add_metadata_as_predictor.cwl # CWL/tools/08_split_data.cwl # CWL/tools/09_train_models.cwl # CWL/tools/10_make_predictions.cwl # CWL/tools/11_evaluate_predictions.cwl # CWL/tools/12_interprete_model.cwl # CWL/workflows/siamcat_workflow.cwl # Rscript_flavor/00_setup.r
96253754 · Kersten Breuer · c6f9095b · f0cf540b · 96253754 · 96253754
Commit 96253754 authored 7 years ago by Kersten Breuer
--- a/CWL/tools/01_validate_data.cwl
+++ b/CWL/tools/01_validate_data.cwl
@@ -31,10 +31,11 @@ arguments:
    prefix: '--label_out'
    valueFrom: $(inputs.label_in.nameroot)_valid.tsv
  - position: 2
+    prefix: '--metadata_out'
    valueFrom: |
      ${
        if (inputs.metadata_in){
-          return [ "--metadata_out", inputs.metadata_in.nameroot + "_valid.tsv" ];
+          return inputs.metadata_in.nameroot + "_valid.tsv";
        } else {
          return null;
        }

--- a/CWL/tools/04_filter_features.cwl
+++ b/CWL/tools/04_filter_features.cwl
@@ -23,17 +23,17 @@ inputs:
      prefix: --cutoff
      position: 2
  rm_unmapped:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --rm_unmapped
      position: 2
-      valueFrom: $(self.toString())
  recomp_prop:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --recomp_prop
      position: 2
-      valueFrom: $(self.toString())

 arguments:
  - prefix: --feat_out

--- a/CWL/tools/06_normalize_features.cwl
+++ b/CWL/tools/06_normalize_features.cwl
@@ -28,28 +28,28 @@ inputs:
      prefix: --sd_min_quantile
      position: 2
  norm_sample:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --norm_sample
      position: 2
-      valueFrom: $(self.toString())
  norm_global:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --norm_global
      position: 2
-      valueFrom: $(self.toString())
  vector_norm:
    type: int?
    inputBinding:
      prefix: --vector_norm
      position: 2
  norm_feature:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --norm_feature
      position: 2
-      valueFrom: $(self.toString())
      
 arguments:
  - position: 3
@@ -64,7 +64,7 @@ outputs:
    type: File
    outputBinding:
      glob: $(inputs.feat_in.nameroot)_norm.tsv
-  normalization_parameters_out:
-    type: File
+  normalization_parameters:
+    type: File?
    outputBinding:
      glob: $(inputs.feat_in.nameroot)_normParam.txt
--- a/CWL/tools/07_add_metadata_as_predictor.cwl
+++ b/CWL/tools/07_add_metadata_as_predictor.cwl
@@ -23,11 +23,11 @@ inputs:
      prefix: --pred_names
      position: 2
  std_meta:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --std_meta
      position: 3
-      valueFrom: $(self.toString())

 arguments:
  - prefix: --feat_out

--- a/CWL/tools/08_split_data.cwl
+++ b/CWL/tools/08_split_data.cwl
@@ -24,15 +24,16 @@ inputs:
      position: 2
  resample:
    type: int?
+    default: 0
    inputBinding:
      prefix: --resample
      position: 2
  stratify:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      position: 2
      prefix: --stratify
-      valueFrom: $(self.toString())
  inseparable:
    type: string?
    inputBinding:
@@ -42,17 +43,17 @@ inputs:
 arguments:
    - position: 3
      prefix: --train_sets
-      valueFrom: $(inputs.feat_in.nameroot)_trainSets.tsv
+      valueFrom: $(inputs.label_in.nameroot)_trainSets.tsv
    - position: 3
      prefix: --test_sets
-      valueFrom: $(inputs.feat_in.nameroot)_testSets.tsv
+      valueFrom: $(inputs.label_in.nameroot)_testSets.tsv

 outputs:
  train_sets_out:
    type: File
    outputBinding:
-      glob: $(inputs.feat_in.nameroot)_trainSets.tsv
+      glob: $(inputs.label_in.nameroot)_trainSets.tsv
  test_sets_out:
    type: File
    outputBinding:
-      glob: $(inputs.feat_in.nameroot)_testSets.tsv
+      glob: $(inputs.label_in.nameroot)_testSets.tsv
--- a/CWL/tools/09_train_models.cwl
+++ b/CWL/tools/09_train_models.cwl
@@ -28,10 +28,10 @@ inputs:
      prefix: --method
      position: 2
  stratify:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --stratify
-      valueFrom: $(self.toString())
      position: 2
  sel_criterion:
    type: string?
@@ -45,26 +45,12 @@ inputs:
      position: 2

 arguments:
-    - position: 3
-      prefix: --model
-      valueFrom: $(inputs.feat_in.nameroot)_model.tsv
    - position: 3
      prefix: --mlr_models_list
      valueFrom: $(inputs.feat_in.nameroot)_model.RData
-    - position: 3
-      prefix: --model_matrix
-      valueFrom: $(inputs.feat_in.nameroot)_modelMatrix.txt

 outputs:
-  model_tsv:
-    type: File
-    outputBinding:
-      glob: $(inputs.feat_in.nameroot)_model.tsv
-  model_rdata:
+  model:
    type: File
    outputBinding:
      glob: $(inputs.feat_in.nameroot)_model.RData
-  model_matrix:
-    type: File
-    outputBinding:
-      glob: $(inputs.feat_in.nameroot)_modelMatrix.txt
--- a/CWL/tools/10_make_predictions.cwl
+++ b/CWL/tools/10_make_predictions.cwl
@@ -13,7 +13,7 @@ inputs:
      prefix: --feat_in
      position: 1
  label_in:
-    type: File?
+    type: File
    inputBinding:
      prefix: --label_in
      position: 1
@@ -22,16 +22,11 @@ inputs:
    inputBinding:
      prefix: --test_sets
      position: 1
-  model_rdata:
+  model:
    type: File
    inputBinding:
      prefix: --mlr_models_list
      position: 1
-  model_matrix:
-    type: File
-    inputBinding:
-      prefix: --model_matrix
-      position: 1

 arguments:
    - position: 2

--- a/CWL/tools/11_evaluate_predictions.cwl
+++ b/CWL/tools/11_evaluate_predictions.cwl
@@ -10,7 +10,7 @@ inputs:
  label_in:
    type: File
    inputBinding:
-      prefix: --label
+      prefix: --label_in
      position: 1
  predictions:
    type: File
@@ -18,26 +18,26 @@ inputs:
      prefix: --pred
      position: 1
  write_eval_results:
-    type: boolean?
+    doc: TRUE or FALSE is allowed
+    type: string?
    inputBinding:
      prefix: --write_eval_results
-      valueFrom: $(self.toString())
      position: 2

 arguments:
  - position: 3
    prefix: --plot
-    valueFrom:   $(inputs.feat_in.nameroot)_evalPlots.pdf
+    valueFrom:   $(inputs.label_in.nameroot)_evalPlots.pdf
  - position: 3
    prefix: --output_results
-    valueFrom:   $(inputs.feat_in.nameroot)_evalResults.txt
+    valueFrom:   $(inputs.label_in.nameroot)_evalResults.txt

 outputs:
  evaluation_plot:
    type: File
    outputBinding:
-      glob: $(inputs.feat_in.nameroot)_evalPlot.pdf
+      glob: $(inputs.label_in.nameroot)_evalPlots.pdf
  evaluation_results:
    type: File?
    outputBinding:
-      glob: $(inputs.feat_in.nameroot)_evalResults.txt
+      glob: $(inputs.label_in.nameroot)_evalResults.txt
--- a/CWL/tools/12_interprete_model.cwl
+++ b/CWL/tools/12_interprete_model.cwl
@@ -11,51 +11,51 @@ inputs:
    type: File
    inputBinding:
      position: 1
-      prefix: '--feat'
+      prefix: --feat_in
  original_feat:
    type: File
    inputBinding:
      position: 1
-      prefix: '--origin_feat'
+      prefix: --origin_feat
  label_in:
    type: File
    inputBinding:
      position: 1
-      prefix: '--label'
+      prefix: --label_in
  metadata_in:
    type: File?
    inputBinding:
      position: 1
-      prefix: '--meta'
-  model_tsv:
+      prefix: --metadata_in
+  model:
    type: File
    inputBinding:
      position: 1
-      prefix: '--model'
+      prefix: --mlr_models_list
  predictions:
    type: File
    inputBinding:
      position: 1
-      prefix: '--pred'
+      prefix: --pred
  color_scheme:
    type: string?
    inputBinding:
      position: 2
-      prefix: '--col_scheme'
+      prefix: --col_scheme
  consensus_threshold:
    type: float?
    inputBinding:
      position: 2
-      prefix: '--consens_thres'
+      prefix: --consens_thres
  heatmap_type:
    type: string?
    inputBinding:
      position: 2
-      prefix: '--heatmap_type'
+      prefix: --heatmap_type

 arguments:
  - position: 3
-    prefix: '--plot'
+    prefix: --plot
    valueFrom: $(inputs.feat_in.nameroot)_model_plots.pdf

 outputs:

--- a/CWL/workflows/siamcat_workflow.cwl
+++ b/CWL/workflows/siamcat_workflow.cwl
 class: Workflow
 cwlVersion: v1.0
-
-requirements:
-  MultipleInputFeatureRequirement: {}
-
 id: siamcat_workflow
 label: siamcat_workflow
 inputs:
@@ -26,10 +22,10 @@ outputs:
    type: File
    'sbg:x': 1952.6685911746124
    'sbg:y': 725.331343086159
-  - id: normalization_parameters_out
+  - id: normalization_parameters
    outputSource:
-      - normalize_features/normalization_parameters_out
-    type: File
+      - normalize_features/normalization_parameters
+    type: File?
    'sbg:x': 1170.382378133225
    'sbg:y': 588.6885709128314
  - id: evaluation_results
@@ -56,9 +52,9 @@ outputs:
    type: File
    'sbg:x': 1941.2224255879937
    'sbg:y': -322.2702631625083
-  - id: model_tsv
+  - id: model
    outputSource:
-      - train_models/model_tsv
+      - train_models/model
    type: File
    'sbg:x': 1183.3419863384065
    'sbg:y': -202.1716046648809
@@ -111,7 +107,7 @@ steps:
          - filter_features/filtered_feat
    out:
      - id: feat_out
-      - id: normalization_parameters_out
+      - id: normalization_parameters
    run: ../tools/06_normalize_features.cwl
    'sbg:x': 657.0314061482312
    'sbg:y': 465.6464459739973
@@ -137,12 +133,9 @@ steps:
      - id: label_in
        source:
          - validate_data/validated_label
-      - id: model_matrix
+      - id: model
        source:
-          - train_models/model_matrix
-      - id: model_rdata
-        source:
-          - train_models/model_rdata
+          - train_models/model
      - id: test_sets
        source:
          - split_data/test_sets_out
@@ -163,9 +156,7 @@ steps:
        source:
          - split_data/train_sets_out
    out:
-      - id: model_matrix
-      - id: model_rdata
-      - id: model_tsv
+      - id: model
    run: ../tools/09_train_models.cwl
    'sbg:x': 951.9482287088812
    'sbg:y': 176.0787371317328
@@ -177,8 +168,6 @@ steps:
      - id: predictions
        source:
          - make_predictions/predictions
-      - id: write_eval_results
-        default: true
    out:
      - id: evaluation_plot
      - id: evaluation_results
@@ -196,9 +185,9 @@ steps:
      - id: metadata_in
        source:
          - validate_data/validated_metadata
-      - id: model_tsv
+      - id: model
        source:
-          - train_models/model_tsv
+          - train_models/model
      - id: original_feat
        source:
          - validate_data/validated_feat
@@ -223,3 +212,5 @@ steps:
    run: ../tools/03_check_for_confounders.cwl
    'sbg:x': 648.4335802713812
    'sbg:y': -322.994492847952
+requirements:
+  MultipleInputFeatureRequirement: {}
--- a/CWL/workflows/siamcat_workflow.cwl.svg
+++ b/CWL/workflows/siamcat_workflow.cwl.svg
--- a/CWL/workflows/siamcat_workflow_arrange1.cwl
+++ b/CWL/workflows/siamcat_workflow_arrange1.cwl
+class: Workflow
+cwlVersion: v1.0
+id: siamcat_workflow
+label: siamcat_workflow
+inputs:
+  - id: label_in
+    type: File
+    'sbg:x': -211.46906743520267
+    'sbg:y': 215.17076736908456
+  - id: feat_in
+    type: File
+    'sbg:x': -213.1508026123047
+    'sbg:y': 354.48820821737314
+  - id: metadata_in
+    type: File?
+    'sbg:x': -213.43544332479505
+    'sbg:y': 65.19374084472656
+outputs:
+  - id: association_plots_out
+    outputSource:
+      - check_associations/association_plots_out
+    type: File
+    'sbg:x': 1920.3959628117993
+    'sbg:y': -265.89943341310783
+  - id: normalization_parameters_out
+    outputSource:
+      - normalize_features/normalization_parameters_out
+    type: File
+    'sbg:x': 1104.0839124465738
+    'sbg:y': -203.8899465928698
+  - id: evaluation_results
+    outputSource:
+      - evaluate_predictions/evaluation_results
+    type: File?
+    'sbg:x': 1931.515819217699
+    'sbg:y': 423.6074051792846
+  - id: evaluation_plot
+    outputSource:
+      - evaluate_predictions/evaluation_plot
+    type: File
+    'sbg:x': 1936.9710693359375
+    'sbg:y': 560.4991510912977
+  - id: model_plots
+    outputSource:
+      - interprete_model/model_plots
+    type: File
+    'sbg:x': 1924.0077850204946
+    'sbg:y': -32.05451321281125
+  - id: confounders_plot
+    outputSource:
+      - check_for_confounders/confounders_plot
+    type: File
+    'sbg:x': 1939.0229602762402
+    'sbg:y': 771.1966557660979
+  - id: model_tsv
+    outputSource:
+      - train_models/model_tsv
+    type: File
+    'sbg:x': 1105.4731946731363
+    'sbg:y': -87.36692340320536
+steps:
+  - id: validate_data
+    in:
+      - id: feat_in
+        source:
+          - feat_in
+      - id: label_in
+        source:
+          - label_in
+      - id: metadata_in
+        source:
+          - metadata_in
+    out:
+      - id: validated_feat
+      - id: validated_label
+      - id: validated_metadata
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/01_validate_data.cwl
+    'sbg:x': 15.152003171656446
+    'sbg:y': 213.78602506789176
+  - id: filter_features
+    in:
+      - id: feat_in
+        source:
+          - select_samples/selected_feat
+          - validate_data/validated_feat
+    out:
+      - id: filtered_feat
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/04_filter_features.cwl
+    'sbg:x': 249.00100933052406
+    'sbg:y': 12.314643731955186
+  - id: check_associations
+    in:
+      - id: feat_in
+        source:
+          - filter_features/filtered_feat
+      - id: label_in
+        source:
+          - select_samples/selected_label
+          - validate_data/validated_label
+    out:
+      - id: association_plots_out
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/05_check_associations.cwl
+    'sbg:x': 758.5679099044457
+    'sbg:y': -262.3459206756455
+  - id: normalize_features
+    in:
+      - id: feat_in
+        source:
+          - filter_features/filtered_feat
+    out:
+      - id: feat_out
+      - id: normalization_parameters_out
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/06_normalize_features.cwl
+    'sbg:x': 546.7723766116638
+    'sbg:y': 2.6954385965526964
+  - id: split_data
+    in:
+      - id: label_in
+        source:
+          - validate_data/validated_label
+      - id: metadata_in
+        source:
+          - validate_data/validated_metadata
+    out:
+      - id: test_sets_out
+      - id: train_sets_out
+    run: /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/08_split_data.cwl
+    'sbg:x': 617.3671975721693
+    'sbg:y': 497.5342735119465
+  - id: make_predictions
+    in:
+      - id: feat_in
+        source:
+          - normalize_features/feat_out
+      - id: label_in
+        source:
+          - validate_data/validated_label
+      - id: model_matrix
+        source:
+          - train_models/model_matrix
+      - id: model_rdata
+        source:
+          - train_models/model_rdata
+      - id: test_sets
+        source:
+          - split_data/test_sets_out
+    out:
+      - id: predictions
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/10_make_predictions.cwl
+    'sbg:x': 1480.8948640695066
+    'sbg:y': 239.193603515625
+  - id: train_models
+    in:
+      - id: feat_in
+        source:
+          - normalize_features/feat_out
+      - id: label_in
+        source:
+          - validate_data/validated_label
+      - id: train_sets
+        source:
+          - split_data/train_sets_out
+    out:
+      - id: model_matrix
+      - id: model_rdata
+      - id: model_tsv
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/09_train_models.cwl
+    'sbg:x': 885.5039230004554
+    'sbg:y': 187.50314745710568
+  - id: evaluate_predictions
+    in:
+      - id: label_in
+        source:
+          - validate_data/validated_label
+      - id: predictions
+        source:
+          - make_predictions/predictions
+      - id: write_eval_results
+        default: true
+    out:
+      - id: evaluation_plot
+      - id: evaluation_results
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/11_evaluate_predictions.cwl
+    'sbg:x': 1742.0773034613228
+    'sbg:y': 478.7590748329334
+  - id: interprete_model
+    in:
+      - id: feat_in
+        source:
+          - normalize_features/feat_out
+      - id: label_in
+        source:
+          - validate_data/validated_label
+      - id: metadata_in
+        source:
+          - validate_data/validated_metadata
+      - id: model_tsv
+        source:
+          - train_models/model_tsv
+      - id: original_feat
+        source:
+          - validate_data/validated_feat
+      - id: predictions
+        source:
+          - make_predictions/predictions
+    out:
+      - id: model_plots
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/12_interprete_model.cwl
+    'sbg:x': 1741.5103701741173
+    'sbg:y': -29.675089878030956
+  - id: check_for_confounders
+    in:
+      - id: label_in
+        source:
+          - validate_data/validated_label
+      - id: metadata_in
+        source:
+          - validate_data/validated_metadata
+    out:
+      - id: confounders_plot
+    run: >-
+      /media/sf_Dokumente/MicrobiomeEMBL/siamcat_cwl/CWL/tools/03_check_for_confounders.cwl
+    'sbg:x': 490.946993293249
+    'sbg:y': 753.4881214090526
--- a/CWL/workflows/siamcat_workflow_arrange1.cwl.svg
+++ b/CWL/workflows/siamcat_workflow_arrange1.cwl.svg
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: SIAMCAT
 Type: Package
 Title: Statistical Inference of Associations between Microbial Communities And host phenoTypes
-Version: 0.2.0
+Version: 0.3.0
 Author: Georg Zeller [aut,cre], Nicolai Karcher [aut], Jakob Wirbel[aut], Konrad Zych [aut]
 Authors@R: c(person("Georg", "Zeller", role = c("aut", "cre"), email = "zeller@embl.de"),
             person("Konrad", "Zych", role = "aut", email = "konrad.zych@embl.de", comment = c(ORCID = "0000-0001-7426-0516")),

--- a/Rscript_flavor/00_setup.r
+++ b/Rscript_flavor/00_setup.r
@@ -29,7 +29,7 @@ package.list <- c("RColorBrewer",
 # script can take the location of the siamcat package as argument: Rscript 00_setup.r /path/to/SIAMCAT_0.2.0.tar.gz
 # by default it is assumed to be located in './SIAMCAT_0.2.0.tar.gz'
 args = commandArgs(trailingOnly = TRUE)
-package.path <- if(length(args)==0) "./SIAMCAT_0.2.0.tar.gz" else args[1]
+package.path <- if(length(args)==0) "./SIAMCAT_0.3.0.tar.gz" else args[1]

 notInst      <- which(!package.list%in%installed.packages())
 if(length(notInst)>0) install.packages(package.list[notInst], repos="http://cran.uni-muenster.de")

--- a/Rscript_flavor/10_make_predictions.r
+++ b/Rscript_flavor/10_make_predictions.r
@@ -65,7 +65,7 @@ write(pred.header, file=opt$pred, append=FALSE)
 if (length(unique(names(pred$pred))) < length(pred$pred)) {
  suppressWarnings(write.table(pred$mat, file=opt$pred, quote=FALSE, sep='\t', row.names=TRUE, col.names=NA, append=TRUE))
 } else {
-  write.table(pred$pred, file=opt$pred, quote=FALSE, sep='\t', row.names=TRUE, col.names=FALSE, append=TRUE)
+  suppressWarnings(write.table(pred$pred, file=opt$pred, quote=FALSE, sep='\t', row.names=TRUE, col.names=NA, append=TRUE))
 }
 cat('\nSaved all predictions\n')