Files
shell-scripting-templates/utilities/strings.bash
2021-10-28 17:59:34 -04:00

494 lines
14 KiB
Bash

# Transform text using these functions
# Some were adapted from https://github.com/jmcantrell/bashful
_cleanString_() {
# DESC:
# Cleans a string of text
# ARGS:
# $1 (Required) - String to be cleaned
# $2 (optional) - Specific characters to be removed (separated by commas,
# escape regex special chars)
# OPTS:
# -l: Forces all text to lowercase
# -u: Forces all text to uppercase
# -a: Removes all non-alphanumeric characters except for spaces and dashes
# -p: Replace one character with another (separated by commas) (escape regex characters)
# -s: In combination with -a, replaces characters with a space
# OUTS:
# stdout: Prints cleaned string
# USAGE:
# _cleanString_ [OPT] [STRING] [CHARS TO REMOVE]
# _cleanString_ -lp " ,-" [STRING] [CHARS TO REMOVE]
# NOTES:
# Always cleaned:
# - leading white space
# - trailing white space
# - multiple spaces become a single space
# - remove spaces before and after -_
local opt
local _lc=false
local _uc=false
local _alphanumeric=false
local _replace=false
local _us=false
local OPTIND=1
while getopts ":lLuUaAsSpP" opt; do
case $opt in
l | L) _lc=true ;;
u | U) _uc=true ;;
a | A) _alphanumeric=true ;;
s | S) _us=true ;;
p | P)
shift
declare -a _pairs=()
IFS=',' read -r -a _pairs <<<"$1"
_replace=true
;;
*)
{
error "Unrecognized option '$1' passed to _execute. Exiting."
return 1
}
;;
esac
done
shift $((OPTIND - 1))
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _string="${1}"
local _userChars="${2:-}"
declare -a _arrayToClean=()
IFS=',' read -r -a _arrayToClean <<<"${_userChars}"
# trim trailing/leading white space and duplicate spaces/tabs
_string="$(printf "%s" "${_string}" | awk '{$1=$1};1')"
local i
for i in "${_arrayToClean[@]}"; do
debug "cleaning: $i"
_string="$(printf "%s" "${_string}" | sed "s/$i//g")"
done
("${_lc}") \
&& _string="$(printf "%s" "${_string}" | tr '[:upper:]' '[:lower:]')"
("${_uc}") \
&& _string="$(printf "%s" "${_string}" | tr '[:lower:]' '[:upper:]')"
if "${_alphanumeric}" && "${_us}"; then
_string="$(printf "%s" "${_string}" | tr -c '[:alnum:]_ -' ' ')"
elif "${_alphanumeric}"; then
_string="$(printf "%s" "${_string}" | sed "s/[^a-zA-Z0-9_ \-]//g")"
fi
if "${_replace}"; then
_string="$(printf "%s" "${_string}" | sed -E "s/${_pairs[0]}/${_pairs[1]}/g")"
fi
# trim trailing/leading white space and duplicate dashes & spaces
_string="$(printf "%s" "${_string}" | tr -s '-' | tr -s '_')"
_string="$(printf "%s" "${_string}" | sed -E 's/([_\-]) /\1/g' | sed -E 's/ ([_\-])/\1/g')"
_string="$(printf "%s" "${_string}" | awk '{$1=$1};1')"
printf "%s\n" "${_string}"
}
_decodeHTML_() {
# DESC:
# Decode HTML characters with sed. Utilizes a sed file for speed.
# ARGS:
# $1 (Required) - String to be decoded
# OUTS:
# 0 - Success
# 1 - Error
# stdout: Prints decoded output
# USAGE:
# _decodeHTML_ <string>
# NOTE:
# Must have a sed file containing replacements. See: ../sedfiles/htmlDecode.sed
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _sedFile
_sedFile="${HOME}/.sed/htmlDecode.sed"
[ -f "${_sedFile}" ] \
&& { printf "%s\n" "${1}" | sed -f "${_sedFile}"; } \
|| return 1
}
_decodeURL_() {
# DESC:
# Decode a URL encoded string
# ARGS:
# $1 (Required) - String to be decoded
# OUTS:
# Prints output to STDOUT
# USAGE:
# _decodeURL_ <string>
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _url_encoded="${1//+/ }"
printf '%b' "${_url_encoded//%/\\x}"
}
_encodeHTML_() {
# DESC:
# Encode HTML characters with sed
# ARGS:
# $1 (Required) - String to be encoded
# OUTS:
# 0 - Success
# 1 - Error
# stdout: Prints encoded output
# USAGE:
# _encodeHTML_ <string>
# NOTE:
# Must have a sed file containing replacements. See: ../sedfiles/htmlEncode.sed
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _sedFile
_sedFile="${HOME}/.sed/htmlEncode.sed"
[ -f "${_sedFile}" ] \
&& { printf "%s" "${1}" | sed -f "${_sedFile}"; } \
|| return 1
}
_encodeURL_() {
# DESC:
# URL encode a string
# ARGS:
# $1 (Required) - String to be encoded
# OUTS:
# Prints output to STDOUT
# USAGE:
# _encodeURL_ <string>
# CREDIT:
# https://gist.github.com/cdown/1163649
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local LANG=C
local i
for ((i = 0; i < ${#1}; i++)); do
if [[ ${1:i:1} =~ ^[a-zA-Z0-9\.\~_-]$ ]]; then
printf "%s" "${1:i:1}"
else
printf '%%%02X' "'${1:i:1}"
fi
done
}
_escapeString_() {
# DESC:
# Escapes a string by adding \ before special chars
# ARGS:
# $@ (Required) - String to be escaped
# OUTS:
# stdout: Prints escaped output
# USAGE:
# _escapeString_ "Some text here"
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
printf "%s\n" "${@}" | sed 's/[]\.|$[ (){}?+*^]/\\&/g'
}
_lower_() {
# DESC:
# Convert a string to lowercase. Used through a pipe or here string.
# ARGS:
# None
# OUTS:
# None
# USAGE:
# text=$(_lower_ <<<"$1")
# printf "STRING" | _lower_
tr '[:upper:]' '[:lower:]'
}
_ltrim_() {
# DESC:
# Removes all leading whitespace (from the left). Used through a pipe or here string.
# ARGS:
# $1 (Optional) - Character to trim. Defaults to [:space:]
# OUTS:
# None
# USAGE:
# text=$(_ltrim_ <<<"$1")
# printf "STRING" | _ltrim_
local _char=${1:-[:space:]}
sed "s%^[${_char//%/\\%}]*%%"
}
_regexCapture_() {
# DESC:
# Use regex to capture a group of text from a string
# ARGS:
# $1 (Required) - Input String
# $2 (Required) - Regex pattern
# OPTIONS:
# -i (Optional) - Ignore case
# OUTS:
# 0 - Regex matched
# 1 - Regex did not match
# stdout: Prints string matching regex
# USAGE:
# HEXCODE=$(_regex_ "background-color: #FFFFFF;" '^(#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3}))$')
# $ printf "%s\n" "${HEXCODE}"
# $ #FFFFFF
# NOTE:
# This example only prints the first matching group. When using multiple capture
# groups some modification is needed.
# CREDIT:
# https://github.com/dylanaraps/pure-bash-bible
local opt
local OPTIND=1
while getopts ":iI" opt; do
case ${opt} in
i | I)
#shellcheck disable=SC2064
trap "$(shopt -p nocasematch)" RETURN # reset nocasematch when function exits
shopt -s nocasematch # Use case-insensitive regex
;;
*) fatal "Unrecognized option '${1}' passed to ${FUNCNAME[0]}. Exiting." ;;
esac
done
shift $((OPTIND - 1))
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if [[ $1 =~ $2 ]]; then
printf '%s\n' "${BASH_REMATCH[1]}"
return 0
else
return 1
fi
}
_rtrim_() {
# DESC:
# Removes all leading whitespace (from the right). Used through a pipe or here string.
# ARGS:
# $1 (Optional) - Character to trim. Defaults to [:space:]
# OUTS:
# None
# USAGE:
# text=$(_rtrim_ <<<"$1")
# printf "STRING" | _rtrim_
local _char=${1:-[:space:]}
sed "s%[${_char//%/\\%}]*$%%"
}
_splitString_() {
# DESC:
# Splat a string into an array based on a given delimiter
# ARGS:
# $1 (Required) - String to be split
# $2 (Required) - Delimiter
# OUTS:
# 0 - Success
# 1 - Failure
# stdout: Values split by delimiter separated by newline
# USAGE:
# ARRAY=( $(_splitString_ "string1,string2,string3" ",") )
# CREDIT:
# https://github.com/labbots/bash-utility/blob/master/src/misc.sh
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
declare -a _arr=()
IFS=$'\n' read -d "" -ra _arr <<<"${1//$2/$'\n'}"
printf '%s\n' "${_arr[@]}"
}
_stringContains_() {
# DESC:
# Tests whether a string contains a substring
# ARGS:
# $1 (Required) - String to be tested
# $2 (Required) - Substring to be tested for
# OPTIONS:
# -i (Optional) - Ignore case
# OUTS:
# 0 - Search pattern found
# 1 - Pattern not found
# USAGE:
# _stringContains_ "Hello World!" "lo"
local opt
local OPTIND=1
while getopts ":iI" opt; do
case ${opt} in
i | I)
#shellcheck disable=SC2064
trap "$(shopt -p nocasematch)" RETURN # reset nocasematch when function exits
shopt -s nocasematch # Use case-insensitive searching
;;
*) fatal "Unrecognized option '${1}' passed to ${FUNCNAME[0]}. Exiting." ;;
esac
done
shift $((OPTIND - 1))
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if [[ ${1} == *${2}* ]]; then
return 0
else
return 1
fi
}
_stringRegex_() {
# DESC:
# Tests whether a string matches a regex pattern
# ARGS:
# $1 (Required) - String to be tested
# $2 (Required) - Regex pattern to be tested for
# OPTIONS:
# -i (Optional) - Ignore case
# OUTS:
# 0 - Search pattern found
# 1 - Pattern not found
# USAGE:
# _stringContains_ "HELLO" "^[A-Z]*$"
# _stringContains_ -i "HELLO" "^[a-z]*$"
local opt
local OPTIND=1
while getopts ":iI" opt; do
case ${opt} in
i | I)
#shellcheck disable=SC2064
trap "$(shopt -p nocasematch)" RETURN # reset nocasematch when function exits
shopt -s nocasematch # Use case-insensitive regex
;;
*) fatal "Unrecognized option '${1}' passed to ${FUNCNAME[0]}. Exiting." ;;
esac
done
shift $((OPTIND - 1))
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if [[ ${1} =~ ${2} ]]; then
return 0
else
return 1
fi
}
_stripStopwords_() {
# DESC:
# Removes common stopwords from a string using a list of sed replacements located
# in an external file. Additional stopwords can be added in arg2
# ARGS:
# $1 (Required) - String to parse
# $2 (Optional) - Additional stopwords (comma separated)
# OUTS:
# 0 - Success
# 1 - Error
# stdout: Prints string cleaned of stopwords
# USAGE:
# CLEAN_WORD="$(_stripStopwords_ "[STRING]" "[MORE,STOP,WORDS]")"
# NOTE:
# Must have a sed file containing replacements. See: ../sedfiles/stopwords.sed
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
if ! sed --version | grep GNU &>/dev/null; then
fatal "_stripStopwords_: Required GNU sed not found. Exiting."
fi
local _string="${1}"
local _sedFile="${HOME}/.sed/stopwords.sed"
local _w
if [ -f "${_sedFile}" ]; then
_string="$(printf "%s" "${_string}" | sed -f "${_sedFile}")"
else
fatal "_stripStopwords_: Missing sedfile expected at: ${_sedFile}"
fi
declare -a _localStopWords=()
IFS=',' read -r -a _localStopWords <<<"${2:-}"
if [[ ${#_localStopWords[@]} -gt 0 ]]; then
for _w in "${_localStopWords[@]}"; do
_string="$(printf "%s" "${_string}" | sed -E "s/\b${_w}\b//gI")"
done
fi
# Remove double spaces and trim left/right
_string="$(printf "%s" "${_string}" | sed -E 's/[ ]{2,}/ /g' | _trim_)"
printf "%s\n" "${_string}"
}
_stripANSI_() {
# DESC:
# Strips ANSI escape sequences from a string
# ARGS:
# $1 (Required) - String to be cleaned
# OUTS:
# 0 - Success
# 1 - Failure
# stdout: Prints string with ANSI escape sequences removed
# USAGE:
# _stripANSI_ "\e[1m\e[91mThis is bold red text\e(B\e[m.\e[92mThis is green text.\e(B\e[m"
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
local _tmp
local _esc
local _tpa
local _re
_tmp="${1}"
_esc=$(printf "\x1b")
_tpa=$(printf "\x28")
_re="(.*)${_esc}[\[${_tpa}][0-9]*;*[mKB](.*)"
while [[ ${_tmp} =~ ${_re} ]]; do
_tmp="${BASH_REMATCH[1]}${BASH_REMATCH[2]}"
done
printf "%s" "${_tmp}"
}
_trim_() {
# DESC:
# Removes all leading/trailing whitespace and reduces internal duplicate spaces
# to a single space.
# ARGS:
# $1 (Required) - String to be trimmed
# OUTS:
# stdout: Prints string with leading/trailing whitespace removed
# USAGE:
# text=$(_trim_ <<<"$1")
# printf "%s" "STRING" | _trim_
# NOTE:
# Used through a pipe or here string.
awk '{$1=$1;print}'
}
_upper_() {
# DESC:
# Convert a string to uppercase. Used through a pipe or here string.
# ARGS:
# None
# OUTS:
# None
# USAGE:
# text=$(_upper_ <<<"$1")
# printf "%s" "STRING" | _upper_
tr '[:lower:]' '[:upper:]'
}