Skip to content
Snippets Groups Projects
Verified Commit 9a9bd76a authored by Renato Alves's avatar Renato Alves :seedling:
Browse files

ENH Add xpipe, csvview and tabview

parent 5221b97a
No related branches found
No related tags found
No related merge requests found
......@@ -9,6 +9,9 @@ EMBL utilities
``docker_wrap.sh`` - a convenience script to start a container that shares the user's home and creates a user to ensure consistent permissions.
``tabview`` and ``csvview`` - quick preview of tab and csv delimited files. Aligns columns based on content. Works fine with streams and large files. Uses xpipe.
``xpipe`` - like xargs but for splitting large or infinite streams into chunks and execute commands on them.
Examples
========
......
#!/usr/bin/env sh
##
# Preview comma-delimited files regardless of their size
#
# Copyright 2017 Renato Alves
# Licensed under MIT (https://en.wikipedia.org/wiki/MIT_License)
##
xpipe 'column -t -s","' -f "$1" | less -S
#!/usr/bin/env sh
##
# Preview tab-delimited files regardless of their size
#
# Copyright 2017 Renato Alves
# Licensed under MIT (https://en.wikipedia.org/wiki/MIT_License)
##
xpipe 'column -t -s" "' -f "$1" | less -S
#!/usr/bin/env sh
##
# Split a stream/file and pipe it to a command in chunks.
#
# Copyright 2017 Renato Alves
# Licensed under MIT (https://en.wikipedia.org/wiki/MIT_License)
##
ERROR="ERROR:"
CHUNK=10000
FILE="/dev/stdin"
usage () {
echo >&2 ""
echo >&2 "Split a stream/file and pipe it to a command in chunks of X lines."
echo >&2 "Think xargs with piped streams - hence xpipe"
echo >&2 ""
echo >&2 "Usage:"
echo >&2 " xpipe [options] 'command ...'"
echo >&2 ""
echo >&2 " Options:"
echo >&2 " -f --file = file/handle to read. Defaults to STDIN"
echo >&2 " -l --lines = number of lines to chunk. Defaults to ${CHUNK}"
echo >&2 ""
echo >&2 "NOTE: Specified command must accept input via STDIN"
echo >&2 ""
}
generic_error () {
usage
echo >&2 "${ERROR} $1"
echo >&2 ""
exit 1
}
required_arg () {
usage
echo >&2 "${ERROR} '$1' is a required argument."
echo >&2 ""
exit 1
}
command -v getopt >/dev/null 2>&1 || generic_error "getopt is required but was not found."
command -v parallel >/dev/null 2>&1 || generic_error "GNU Parallel is required but was not found."
ARG_PARSE="getopt -o f:l:h -l file:lines:,help -n $0 --"
# We process arguments twice to handle any argument parsing error:
ARG_ERROR=$($ARG_PARSE "$@" 2>&1 1>/dev/null)
if [ $? -ne 0 ]; then
generic_error "$ARG_ERROR"
fi
# Abort on any errors from this point onwards
set -e
# Parse args using getopt (instead of getopts) to allow arguments before options
ARGS=$($ARG_PARSE "$@")
# reorganize arguments as returned by getopt
eval set -- "$ARGS"
while true; do
case "$1" in
# Shift before to throw away option
# Shift after if option has a required positional argument
-l|--lines)
shift
CHUNK="$1"
shift
;;
-f|--file)
shift
FILE="$1"
shift
;;
-h|--help)
shift
usage
exit 1
;;
--)
shift
break
;;
esac
done
COMMAND="$1"
[ -n "$COMMAND" ] || required_arg "command"
# TODO For now parallel does all the heavy lifting. Ideally we would only depend on the shell or unix commands.
parallel --pipe -N "$CHUNK" -k "$1" < "$FILE"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment