Files
shell-scripting-templates/utilities/strings.bash
Nathaniel Landau 8bb89541e8 Squashed commit of the following:
commit 61bf734812cb62ba6e0ec224bc15f7928705a8a2
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Thu Oct 21 15:44:21 2021 -0400

    Major overhaul continued

     - rename templates
     - add checks utilities
     - add new array utilities
     - rename files
     - add assorted utilities
     - improve documentation

commit 546178fff3b526f492eb0eeffc63f79537e75de3
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Wed Oct 20 16:31:14 2021 -0400

    Update conventions

commit f6d0642f85518efda9c5d8472b99d1c14163e381
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Wed Oct 20 09:47:09 2021 -0400

    minor formatting changes

commit 2217612b55e3f9faf803a2d0c937ea2261206505
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Tue Oct 19 17:59:09 2021 -0400

    add new functions

commit 347ba7aa738dcd6a5ad9d70886b38da3a17dc89e
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Tue Oct 19 12:06:44 2021 -0400

    major overhaul

    - Add standaloneTemplate.sh
    - Rework README
    - Refactor inline documentation
    - Enforce coding standards
    - Remove CSV utilities
    - Add new array utilities
    - add _useGNUutils_
    - more ...

commit cd8e0d49aef25eeaf6b3e71a3c9e1f29ab9b06f5
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Sun Oct 17 09:56:08 2021 -0400

    Add debug functions

commit f7c5c0a3d19815dcc6ba80b5f5a2ebb77ef88b07
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Sat Oct 16 21:10:01 2021 -0400

    add new array functions

    _joinArray_, _isEmptyArray_, _sortArray_, _reverseSortArray_, and _mergearrays_

commit d8bc3d8cabdbcee3c479f97b43a45bdfe3bdafe0
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Fri Oct 15 17:27:12 2021 -0400

    add _columnize_

commit 2fd2ae9435f476bc3968c3eb0d793db4bf1d9eaf
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Mon Oct 11 22:17:45 2021 -0400

    _progressBar_: Fix unbound variable

commit e8933d15fc955a1acc665e9a081f131e681855d5
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Sun Oct 10 11:50:42 2021 -0400

    _alert_: header now underlined

commit c9ce894361dec7d3513c038794a155519baf26bc
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Tue Oct 5 09:49:42 2021 -0400

    _alert_: line numbers to gray

commit 4aaddd336ce613f629a7e6a62ef3b27ffc24d22d
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Fri Oct 8 15:05:20 2021 -0400

    _usage_ to stdout

commit e2372fc3122ec1f20acc27f04d29b3785f014e25
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Tue Oct 5 09:38:26 2021 -0400

    _setPATH_: remove unneeded logic

commit e60c75b6c954ac4bd146e2758252168027b9a43d
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Tue Oct 5 09:25:38 2021 -0400

    _findSource_: bugfix

commit 0e84912e1ccd7203e5beff9f8737f8374f4aa5d8
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Thu Sep 30 16:29:25 2021 -0400

    add requirements to documentation

commit 2c24843e3ada591e1868a94416e40b5ac0aa4994
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Thu Sep 30 15:34:10 2021 -0400

    _uniqueFilename_: improve extension handling

commit 08bc2dfdcc8632efee9179e9c960a574fc17cf0c
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Mon Sep 27 15:13:53 2021 -0400

    improve hooks script

commit 641918f1559d3b3aa38a9bbdf418938b2b81c176
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Fri Sep 24 08:16:52 2021 -0400

    _inArry_: case insensitivity

commit eae10f170680540fdb4a1222add7e54f8785ea63
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Mon Sep 20 18:31:44 2021 -0400

    clean up alerting

commit 700acd56f57fd57db84ef0e232ef41cdd7aee43c
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Mon Sep 20 18:22:11 2021 -0400

    refactor _execute_

commit d893f86900a9fed9d91a0c9cc06c13b6b34d9926
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Mon Sep 20 18:19:18 2021 -0400

    'fatal' replaces 'die'

commit 3326857bf127bef36cd9982246aa5b826d796d0a
Author: Nathaniel Landau <nate@natelandau.com>
Date:   Fri Sep 17 08:29:50 2021 -0400

    _execute_: ensure quiet and verbose work together
2021-10-21 16:03:27 -04:00

439 lines
12 KiB
Bash

# Transform text using these functions
# Some were adapted from https://github.com/jmcantrell/bashful
_cleanString_() {
# DESC:
# Cleans a string of text
# ARGS:
# $1 (Required) - String to be cleaned
# $2 (optional) - Specific characters to be removed (separated by commas,
# escape regex special chars)
# OPTS:
# -l: Forces all text to lowercase
# -u: Forces all text to uppercase
# -a: Removes all non-alphanumeric characters except for spaces and dashes
# -p: Replace one character with another (separated by commas) (escape regex characters)
# -s: In combination with -a, replaces characters with a space
# OUTS:
# stdout: Prints cleaned string
# USAGE:
# _cleanString_ [OPT] [STRING] [CHARS TO REMOVE]
# _cleanString_ -lp " ,-" [STRING] [CHARS TO REMOVE]
# NOTES:
# Always cleaned:
# - leading white space
# - trailing white space
# - multiple spaces become a single space
# - remove spaces before and after -_
local opt
local _lc=false
local _uc=false
local _alphanumeric=false
local _replace=false
local _us=false
local OPTIND=1
while getopts ":lLuUaAsSpP" opt; do
case $opt in
l | L) _lc=true ;;
u | U) _uc=true ;;
a | A) _alphanumeric=true ;;
s | S) _us=true ;;
p | P)
shift
declare -a _pairs=()
IFS=',' read -r -a _pairs <<<"$1"
_replace=true
;;
*)
{
error "Unrecognized option '$1' passed to _execute. Exiting."
return 1
}
;;
esac
done
shift $((OPTIND - 1))
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _string="${1}"
local _userChars="${2:-}"
declare -a _arrayToClean=()
IFS=',' read -r -a _arrayToClean <<<"${_userChars}"
# trim trailing/leading white space and duplicate spaces/tabs
_string="$(echo "${_string}" | awk '{$1=$1};1')"
local i
for i in "${_arrayToClean[@]}"; do
debug "cleaning: $i"
_string="$(echo "${_string}" | sed "s/$i//g")"
done
("${_lc}") \
&& _string="$(echo "${_string}" | tr '[:upper:]' '[:lower:]')"
("${_uc}") \
&& _string="$(echo "${_string}" | tr '[:lower:]' '[:upper:]')"
if "${_alphanumeric}" && "${_us}"; then
_string="$(echo "${_string}" | tr -c '[:alnum:]_ -' ' ')"
elif "${_alphanumeric}"; then
_string="$(echo "${_string}" | sed "s/[^a-zA-Z0-9_ \-]//g")"
fi
if "${_replace}"; then
_string="$(echo "${_string}" | sed -E "s/${_pairs[0]}/${_pairs[1]}/g")"
fi
# trim trailing/leading white space and duplicate dashes
_string="$(echo "${_string}" | tr -s '-' | tr -s '_')"
_string="$(echo "${_string}" | sed -E 's/([_\-]) /\1/g' | sed -E 's/ ([_\-])/\1/g')"
_string="$(echo "${_string}" | awk '{$1=$1};1')"
echo "${_string}"
}
_decodeHTML_() {
# DESC:
# Decode HTML characters with sed. Utilizes a sed file for speed.
# ARGS:
# $1 (Required) - String to be decoded
# OUTS:
# 0 - Success
# 1 - Error
# stdout: Prints decoded output
# USAGE:
# _decodeHTML_ <string>
# NOTE:
# Must have a sed file containing replacements. See: ../sedfiles/htmlDecode.sed
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _sedFile
_sedFile="${HOME}/.sed/htmlDecode.sed"
[ -f "${_sedFile}" ] \
&& { printf "%s\n" "${1}" | sed -f "${_sedFile}"; } \
|| return 1
}
_decodeURL_() {
# DESC:
# Decode a URL encoded string
# ARGS:
# $1 (Required) - String to be decoded
# OUTS:
# Prints output to STDOUT
# USAGE:
# _decodeURL_ <string>
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _url_encoded="${1//+/ }"
printf '%b' "${_url_encoded//%/\\x}"
}
_encodeHTML_() {
# DESC:
# Encode HTML characters with sed
# ARGS:
# $1 (Required) - String to be encoded
# OUTS:
# 0 - Success
# 1 - Error
# stdout: Prints encoded output
# USAGE:
# _encodeHTML_ <string>
# NOTE:
# Must have a sed file containing replacements. See: ../sedfiles/htmlEncode.sed
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _sedFile
_sedFile="${HOME}/.sed/htmlEncode.sed"
[ -f "${_sedFile}" ] \
&& { echo "${1}" | sed -f "${_sedFile}"; } \
|| return 1
}
_encodeURL_() {
# DESC:
# URL encode a string
# ARGS:
# $1 (Required) - String to be encoded
# OUTS:
# Prints output to STDOUT
# USAGE:
# _encodeURL_ <string>
# CREDIT:
# https://gist.github.com/cdown/1163649
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local LANG=C
local i
for ((i = 0; i < ${#1}; i++)); do
if [[ ${1:i:1} =~ ^[a-zA-Z0-9\.\~_-]$ ]]; then
printf "${1:i:1}"
else
printf '%%%02X' "'${1:i:1}"
fi
done
}
_escapeString_() {
# DESC:
# Escapes a string by adding \ before special chars
# ARGS:
# $@ (Required) - String to be escaped
# OUTS:
# stdout: Prints escaped output
# USAGE:
# _escapeString_ "Some text here"
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
printf "%s\n" "${@}" | sed 's/[]\.|$[ (){}?+*^]/\\&/g'
}
_lower_() {
# DESC:
# Convert a string to lowercase. Used through a pipe or here string.
# ARGS:
# None
# OUTS:
# None
# USAGE:
# text=$(_lower_ <<<"$1")
# echo "STRING" | _lower_
tr '[:upper:]' '[:lower:]'
}
_ltrim_() {
# DESC:
# Removes all leading whitespace (from the left). Used through a pipe or here string.
# ARGS:
# $1 (Optional) - Character to trim. Defaults to [:space:]
# OUTS:
# None
# USAGE:
# text=$(_ltrim_ <<<"$1")
# echo "STRING" | _ltrim_
local _char=${1:-[:space:]}
sed "s%^[${_char//%/\\%}]*%%"
}
_regexCapture_() {
# DESC:
# Use regex to validate and parse strings
# ARGS:
# $1 (Required) - Input String
# $2 (Required) - Regex pattern
# OUTS:
# 0 - Regex matched
# 1 - Regex did not match
# stdout: Prints string matching regex
# USAGE:
# _regex_ "#FFFFFF" '^(#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3}))$'
# NOTE:
# This example only prints the first matching group. When using multiple capture
# groups some modification is needed.
# CREDIT:
# https://github.com/dylanaraps/pure-bash-bible
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if [[ $1 =~ $2 ]]; then
printf '%s\n' "${BASH_REMATCH[1]}"
return 0
else
return 1
fi
}
_rtrim_() {
# DESC:
# Removes all leading whitespace (from the right). Used through a pipe or here string.
# ARGS:
# $1 (Optional) - Character to trim. Defaults to [:space:]
# OUTS:
# None
# USAGE:
# text=$(_rtrim_ <<<"$1")
# echo "STRING" | _rtrim_
local _char=${1:-[:space:]}
sed "s%[${_char//%/\\%}]*$%%"
}
_splitString_() {
# DESC:
# Splat a string into an array based on a given delimiter
# ARGS:
# $1 (Required) - String to be split
# $2 (Required) - Delimiter
# OUTS:
# 0 - Success
# 1 - Failure
# stdout: Values split by delimiter separated by newline
# USAGE:
# ARRAY=( $(_splitString_ "string1,string2,string3" ",") )
# CREDIT:
# https://github.com/labbots/bash-utility/blob/master/src/misc.sh
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
declare -a _arr=()
IFS=$'\n' read -d "" -ra _arr <<<"${1//$2/$'\n'}"
printf '%s\n' "${_arr[@]}"
}
_stringContains_() {
# DESC:
# Tests whether a string contains a substring
# ARGS:
# $1 (Required) - String to be tested
# $2 (Required) - Substring to be tested for
# OUTS:
# 0 - Search pattern found
# 1 - Pattern not found
# USAGE:
# _stringContains_ "Hello World!" "lo"
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if [[ ${1} == *${2}* ]]; then
return 0
else
return 1
fi
}
_stringRegex_() {
# DESC:
# Tests whether a string matches a regex pattern
# ARGS:
# $1 (Required) - String to be tested
# $2 (Required) - Regex pattern to be tested for
# OUTS:
# 0 - Search pattern found
# 1 - Pattern not found
# USAGE:
# _stringContains_ "HELLO" "^[A-Z]*$"
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if [[ ${1} =~ ${2} ]]; then
return 0
else
return 1
fi
}
_stripStopwords_() {
# DESC:
# Removes common stopwords from a string using a list of sed replacements located
# in an external file. Additional stopwords can be added in arg2
# ARGS:
# $1 (Required) - String to parse
# $2 (Optional) - Additional stopwords (comma separated)
# OUTS:
# 0 - Success
# 1 - Error
# stdout: Prints string cleaned of stopwords
# USAGE:
# CLEAN_WORD="$(_stripStopwords_ "[STRING]" "[MORE,STOP,WORDS]")"
# NOTE:
# Must have a sed file containing replacements. See: ../sedfiles/stopwords.sed
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if ! sed --version | grep GNU &>/dev/null; then
fatal "_stripStopwords_: Required GNU sed not found. Exiting."
fi
local _string="${1}"
local _sedFile="${HOME}/.sed/stopwords.sed"
local _w
if [ -f "${_sedFile}" ]; then
_string="$(echo "${_string}" | sed -f "${_sedFile}")"
else
fatal "_stripStopwords_: Missing sedfile expected at: ${_sedFile}"
fi
declare -a _localStopWords=()
IFS=',' read -r -a _localStopWords <<<"${2:-}"
if [[ ${#_localStopWords[@]} -gt 0 ]]; then
for _w in "${_localStopWords[@]}"; do
_string="$(echo "${_string}" | sed -E "s/\b${_w}\b//gI")"
done
fi
# Remove double spaces and trim left/right
_string="$(echo "${_string}" | sed -E 's/[ ]{2,}/ /g' | _trim_)"
printf "%s\n" "${_string}"
}
_stripANSI_() {
# DESC:
# Strips ANSI escape sequences from a string
# ARGS:
# $1 (Required) - String to be cleaned
# OUTS:
# 0 - Success
# 1 - Failure
# stdout: Prints string with ANSI escape sequences removed
# USAGE:
# _stripANSI_ "\e[1m\e[91mThis is bold red text\e(B\e[m.\e[92mThis is green text.\e(B\e[m"
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _tmp
local _esc
local _tpa
local _re
_tmp="${1}"
_esc=$(printf "\x1b")
_tpa=$(printf "\x28")
_re="(.*)${_esc}[\[${_tpa}][0-9]*;*[mKB](.*)"
while [[ ${_tmp} =~ ${_re} ]]; do
_tmp="${BASH_REMATCH[1]}${BASH_REMATCH[2]}"
done
printf "%s" "${_tmp}"
}
_trim_() {
# DESC:
# Removes all leading/trailing whitespace. Used through a pipe or here string.
# ARGS:
# None
# OUTS:
# None
# USAGE:
# text=$(_trim_ <<<"$1")
# echo "STRING" | _trim_
awk '{$1=$1;print}'
}
_upper_() {
# DESC:
# Convert a string to uppercase. Used through a pipe or here string.
# ARGS:
# None
# OUTS:
# None
# USAGE:
# text=$(_upper_ <<<"$1")
# echo "STRING" | _upper_
tr '[:lower:]' '[:upper:]'
}