mirror of
https://github.com/natelandau/shell-scripting-templates.git
synced 2025-11-08 13:13:47 -05:00
494 lines
14 KiB
Bash
494 lines
14 KiB
Bash
# Transform text using these functions
|
|
# Some were adapted from https://github.com/jmcantrell/bashful
|
|
|
|
_cleanString_() {
|
|
# DESC:
|
|
# Cleans a string of text
|
|
# ARGS:
|
|
# $1 (Required) - String to be cleaned
|
|
# $2 (optional) - Specific characters to be removed (separated by commas,
|
|
# escape regex special chars)
|
|
# OPTS:
|
|
# -l: Forces all text to lowercase
|
|
# -u: Forces all text to uppercase
|
|
# -a: Removes all non-alphanumeric characters except for spaces and dashes
|
|
# -p: Replace one character with another (separated by commas) (escape regex characters)
|
|
# -s: In combination with -a, replaces characters with a space
|
|
# OUTS:
|
|
# stdout: Prints cleaned string
|
|
# USAGE:
|
|
# _cleanString_ [OPT] [STRING] [CHARS TO REMOVE]
|
|
# _cleanString_ -lp " ,-" [STRING] [CHARS TO REMOVE]
|
|
# NOTES:
|
|
# Always cleaned:
|
|
# - leading white space
|
|
# - trailing white space
|
|
# - multiple spaces become a single space
|
|
# - remove spaces before and after -_
|
|
|
|
local opt
|
|
local _lc=false
|
|
local _uc=false
|
|
local _alphanumeric=false
|
|
local _replace=false
|
|
local _us=false
|
|
|
|
local OPTIND=1
|
|
while getopts ":lLuUaAsSpP" opt; do
|
|
case ${opt} in
|
|
l | L) _lc=true ;;
|
|
u | U) _uc=true ;;
|
|
a | A) _alphanumeric=true ;;
|
|
s | S) _us=true ;;
|
|
p | P)
|
|
shift
|
|
declare -a _pairs=()
|
|
IFS=',' read -r -a _pairs <<<"$1"
|
|
_replace=true
|
|
;;
|
|
*)
|
|
{
|
|
error "Unrecognized option '$1' passed to _execute. Exiting."
|
|
return 1
|
|
}
|
|
;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
local _string="${1}"
|
|
local _userChars="${2:-}"
|
|
|
|
declare -a _arrayToClean=()
|
|
IFS=',' read -r -a _arrayToClean <<<"${_userChars}"
|
|
|
|
# trim trailing/leading white space and duplicate spaces/tabs
|
|
_string="$(printf "%s" "${_string}" | awk '{$1=$1};1')"
|
|
|
|
local i
|
|
for i in "${_arrayToClean[@]}"; do
|
|
debug "cleaning: ${i}"
|
|
_string="$(printf "%s" "${_string}" | sed "s/${i}//g")"
|
|
done
|
|
|
|
("${_lc}") \
|
|
&& _string="$(printf "%s" "${_string}" | tr '[:upper:]' '[:lower:]')"
|
|
|
|
("${_uc}") \
|
|
&& _string="$(printf "%s" "${_string}" | tr '[:lower:]' '[:upper:]')"
|
|
|
|
if "${_alphanumeric}" && "${_us}"; then
|
|
_string="$(printf "%s" "${_string}" | tr -c '[:alnum:]_ -' ' ')"
|
|
elif "${_alphanumeric}"; then
|
|
_string="$(printf "%s" "${_string}" | sed "s/[^a-zA-Z0-9_ \-]//g")"
|
|
fi
|
|
|
|
if "${_replace}"; then
|
|
_string="$(printf "%s" "${_string}" | sed -E "s/${_pairs[0]}/${_pairs[1]}/g")"
|
|
fi
|
|
|
|
# trim trailing/leading white space and duplicate dashes & spaces
|
|
_string="$(printf "%s" "${_string}" | tr -s '-' | tr -s '_')"
|
|
_string="$(printf "%s" "${_string}" | sed -E 's/([_\-]) /\1/g' | sed -E 's/ ([_\-])/\1/g')"
|
|
_string="$(printf "%s" "${_string}" | awk '{$1=$1};1')"
|
|
|
|
printf "%s\n" "${_string}"
|
|
|
|
}
|
|
|
|
_decodeHTML_() {
|
|
# DESC:
|
|
# Decode HTML characters with sed. Utilizes a sed file for speed.
|
|
# ARGS:
|
|
# $1 (Required) - String to be decoded
|
|
# OUTS:
|
|
# 0 - Success
|
|
# 1 - Error
|
|
# stdout: Prints decoded output
|
|
# USAGE:
|
|
# _decodeHTML_ <string>
|
|
# NOTE:
|
|
# Must have a sed file containing replacements. See: ../sedfiles/htmlDecode.sed
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
local _sedFile
|
|
_sedFile="${HOME}/.sed/htmlDecode.sed"
|
|
|
|
[ -f "${_sedFile}" ] \
|
|
&& { printf "%s\n" "${1}" | sed -f "${_sedFile}"; } \
|
|
|| return 1
|
|
}
|
|
|
|
_decodeURL_() {
|
|
# DESC:
|
|
# Decode a URL encoded string
|
|
# ARGS:
|
|
# $1 (Required) - String to be decoded
|
|
# OUTS:
|
|
# Prints output to STDOUT
|
|
# USAGE:
|
|
# _decodeURL_ <string>
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
local _url_encoded="${1//+/ }"
|
|
printf '%b' "${_url_encoded//%/\\x}"
|
|
}
|
|
|
|
_encodeHTML_() {
|
|
# DESC:
|
|
# Encode HTML characters with sed
|
|
# ARGS:
|
|
# $1 (Required) - String to be encoded
|
|
# OUTS:
|
|
# 0 - Success
|
|
# 1 - Error
|
|
# stdout: Prints encoded output
|
|
# USAGE:
|
|
# _encodeHTML_ <string>
|
|
# NOTE:
|
|
# Must have a sed file containing replacements. See: ../sedfiles/htmlEncode.sed
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
local _sedFile
|
|
_sedFile="${HOME}/.sed/htmlEncode.sed"
|
|
|
|
[ -f "${_sedFile}" ] \
|
|
&& { printf "%s" "${1}" | sed -f "${_sedFile}"; } \
|
|
|| return 1
|
|
}
|
|
|
|
_encodeURL_() {
|
|
# DESC:
|
|
# URL encode a string
|
|
# ARGS:
|
|
# $1 (Required) - String to be encoded
|
|
# OUTS:
|
|
# Prints output to STDOUT
|
|
# USAGE:
|
|
# _encodeURL_ <string>
|
|
# CREDIT:
|
|
# https://gist.github.com/cdown/1163649
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
local LANG=C
|
|
local i
|
|
|
|
for ((i = 0; i < ${#1}; i++)); do
|
|
if [[ ${1:i:1} =~ ^[a-zA-Z0-9\.\~_-]$ ]]; then
|
|
printf "%s" "${1:i:1}"
|
|
else
|
|
printf '%%%02X' "'${1:i:1}"
|
|
fi
|
|
done
|
|
}
|
|
|
|
_escapeString_() {
|
|
# DESC:
|
|
# Escapes a string by adding \ before special chars
|
|
# ARGS:
|
|
# $@ (Required) - String to be escaped
|
|
# OUTS:
|
|
# stdout: Prints escaped output
|
|
# USAGE:
|
|
# _escapeString_ "Some text here"
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
printf "%s\n" "${@}" | sed 's/[]\.|$[ (){}?+*^]/\\&/g'
|
|
}
|
|
|
|
_lower_() {
|
|
# DESC:
|
|
# Convert a string to lowercase. Used through a pipe or here string.
|
|
# ARGS:
|
|
# None
|
|
# OUTS:
|
|
# None
|
|
# USAGE:
|
|
# text=$(_lower_ <<<"$1")
|
|
# printf "STRING" | _lower_
|
|
tr '[:upper:]' '[:lower:]'
|
|
}
|
|
|
|
_ltrim_() {
|
|
# DESC:
|
|
# Removes all leading whitespace (from the left). Used through a pipe or here string.
|
|
# ARGS:
|
|
# $1 (Optional) - Character to trim. Defaults to [:space:]
|
|
# OUTS:
|
|
# None
|
|
# USAGE:
|
|
# text=$(_ltrim_ <<<"$1")
|
|
# printf "STRING" | _ltrim_
|
|
local _char=${1:-[:space:]}
|
|
sed "s%^[${_char//%/\\%}]*%%"
|
|
}
|
|
|
|
_regexCapture_() {
|
|
# DESC:
|
|
# Use regex to capture a group of text from a string
|
|
# ARGS:
|
|
# $1 (Required) - Input String
|
|
# $2 (Required) - Regex pattern
|
|
# OPTIONS:
|
|
# -i (Optional) - Ignore case
|
|
# OUTS:
|
|
# 0 - Regex matched
|
|
# 1 - Regex did not match
|
|
# stdout: Prints string matching regex
|
|
# USAGE:
|
|
# HEXCODE=$(_regex_ "background-color: #FFFFFF;" '^(#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3}))$')
|
|
# $ printf "%s\n" "${HEXCODE}"
|
|
# $ #FFFFFF
|
|
# NOTE:
|
|
# This example only prints the first matching group. When using multiple capture
|
|
# groups some modification is needed.
|
|
# CREDIT:
|
|
# https://github.com/dylanaraps/pure-bash-bible
|
|
|
|
local opt
|
|
local OPTIND=1
|
|
while getopts ":iI" opt; do
|
|
case ${opt} in
|
|
i | I)
|
|
#shellcheck disable=SC2064
|
|
trap '$(shopt -p nocasematch)' RETURN # reset nocasematch when function exits
|
|
shopt -s nocasematch # Use case-insensitive regex
|
|
;;
|
|
*) fatal "Unrecognized option '${1}' passed to ${FUNCNAME[0]}. Exiting." ;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
if [[ $1 =~ $2 ]]; then
|
|
printf '%s\n' "${BASH_REMATCH[1]}"
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
_rtrim_() {
|
|
# DESC:
|
|
# Removes all leading whitespace (from the right). Used through a pipe or here string.
|
|
# ARGS:
|
|
# $1 (Optional) - Character to trim. Defaults to [:space:]
|
|
# OUTS:
|
|
# None
|
|
# USAGE:
|
|
# text=$(_rtrim_ <<<"$1")
|
|
# printf "STRING" | _rtrim_
|
|
local _char=${1:-[:space:]}
|
|
sed "s%[${_char//%/\\%}]*$%%"
|
|
}
|
|
|
|
_splitString_() {
|
|
# DESC:
|
|
# Splat a string into an array based on a given delimiter
|
|
# ARGS:
|
|
# $1 (Required) - String to be split
|
|
# $2 (Required) - Delimiter
|
|
# OUTS:
|
|
# 0 - Success
|
|
# 1 - Failure
|
|
# stdout: Values split by delimiter separated by newline
|
|
# USAGE:
|
|
# ARRAY=( $(_splitString_ "string1,string2,string3" ",") )
|
|
# CREDIT:
|
|
# https://github.com/labbots/bash-utility/blob/master/src/misc.sh
|
|
|
|
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
declare -a _arr=()
|
|
IFS=$'\n' read -d "" -ra _arr <<<"${1//$2/$'\n'}"
|
|
printf '%s\n' "${_arr[@]}"
|
|
}
|
|
|
|
_stringContains_() {
|
|
# DESC:
|
|
# Tests whether a string contains a substring
|
|
# ARGS:
|
|
# $1 (Required) - String to be tested
|
|
# $2 (Required) - Substring to be tested for
|
|
# OPTIONS:
|
|
# -i (Optional) - Ignore case
|
|
# OUTS:
|
|
# 0 - Search pattern found
|
|
# 1 - Pattern not found
|
|
# USAGE:
|
|
# _stringContains_ "Hello World!" "lo"
|
|
|
|
local opt
|
|
local OPTIND=1
|
|
while getopts ":iI" opt; do
|
|
case ${opt} in
|
|
i | I)
|
|
#shellcheck disable=SC2064
|
|
trap '$(shopt -p nocasematch)' RETURN # reset nocasematch when function exits
|
|
shopt -s nocasematch # Use case-insensitive searching
|
|
;;
|
|
*) fatal "Unrecognized option '${1}' passed to ${FUNCNAME[0]}. Exiting." ;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
if [[ ${1} == *${2}* ]]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
_stringRegex_() {
|
|
# DESC:
|
|
# Tests whether a string matches a regex pattern
|
|
# ARGS:
|
|
# $1 (Required) - String to be tested
|
|
# $2 (Required) - Regex pattern to be tested for
|
|
# OPTIONS:
|
|
# -i (Optional) - Ignore case
|
|
# OUTS:
|
|
# 0 - Search pattern found
|
|
# 1 - Pattern not found
|
|
# USAGE:
|
|
# _stringContains_ "HELLO" "^[A-Z]*$"
|
|
# _stringContains_ -i "HELLO" "^[a-z]*$"
|
|
|
|
local opt
|
|
local OPTIND=1
|
|
while getopts ":iI" opt; do
|
|
case ${opt} in
|
|
i | I)
|
|
#shellcheck disable=SC2064
|
|
trap '$(shopt -p nocasematch)' RETURN # reset nocasematch when function exits
|
|
shopt -s nocasematch # Use case-insensitive regex
|
|
;;
|
|
*) fatal "Unrecognized option '${1}' passed to ${FUNCNAME[0]}. Exiting." ;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
[[ $# -lt 2 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
if [[ ${1} =~ ${2} ]]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
_stripStopwords_() {
|
|
# DESC:
|
|
# Removes common stopwords from a string using a list of sed replacements located
|
|
# in an external file. Additional stopwords can be added in arg2
|
|
# ARGS:
|
|
# $1 (Required) - String to parse
|
|
# $2 (Optional) - Additional stopwords (comma separated)
|
|
# OUTS:
|
|
# 0 - Success
|
|
# 1 - Error
|
|
# stdout: Prints string cleaned of stopwords
|
|
# USAGE:
|
|
# CLEAN_WORD="$(_stripStopwords_ "[STRING]" "[MORE,STOP,WORDS]")"
|
|
# NOTE:
|
|
# Must have a sed file containing replacements. See: ../sedfiles/stopwords.sed
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
|
|
if ! sed --version | grep GNU &>/dev/null; then
|
|
fatal "_stripStopwords_: Required GNU sed not found. Exiting."
|
|
fi
|
|
|
|
local _string="${1}"
|
|
local _sedFile="${HOME}/.sed/stopwords.sed"
|
|
local _w
|
|
|
|
if [ -f "${_sedFile}" ]; then
|
|
_string="$(printf "%s" "${_string}" | sed -f "${_sedFile}")"
|
|
else
|
|
fatal "_stripStopwords_: Missing sedfile expected at: ${_sedFile}"
|
|
fi
|
|
|
|
declare -a _localStopWords=()
|
|
IFS=',' read -r -a _localStopWords <<<"${2:-}"
|
|
|
|
if [[ ${#_localStopWords[@]} -gt 0 ]]; then
|
|
for _w in "${_localStopWords[@]}"; do
|
|
_string="$(printf "%s" "${_string}" | sed -E "s/\b${_w}\b//gI")"
|
|
done
|
|
fi
|
|
|
|
# Remove double spaces and trim left/right
|
|
_string="$(printf "%s" "${_string}" | sed -E 's/[ ]{2,}/ /g' | _trim_)"
|
|
|
|
printf "%s\n" "${_string}"
|
|
|
|
}
|
|
|
|
_stripANSI_() {
|
|
# DESC:
|
|
# Strips ANSI escape sequences from a string
|
|
# ARGS:
|
|
# $1 (Required) - String to be cleaned
|
|
# OUTS:
|
|
# 0 - Success
|
|
# 1 - Failure
|
|
# stdout: Prints string with ANSI escape sequences removed
|
|
# USAGE:
|
|
# _stripANSI_ "\e[1m\e[91mThis is bold red text\e(B\e[m.\e[92mThis is green text.\e(B\e[m"
|
|
|
|
[[ $# == 0 ]] && fatal "Missing required argument to ${FUNCNAME[0]}"
|
|
local _tmp
|
|
local _esc
|
|
local _tpa
|
|
local _re
|
|
_tmp="${1}"
|
|
_esc=$(printf "\x1b")
|
|
_tpa=$(printf "\x28")
|
|
_re="(.*)${_esc}[\[${_tpa}][0-9]*;*[mKB](.*)"
|
|
while [[ ${_tmp} =~ ${_re} ]]; do
|
|
_tmp="${BASH_REMATCH[1]}${BASH_REMATCH[2]}"
|
|
done
|
|
printf "%s" "${_tmp}"
|
|
}
|
|
|
|
_trim_() {
|
|
# DESC:
|
|
# Removes all leading/trailing whitespace and reduces internal duplicate spaces
|
|
# to a single space.
|
|
# ARGS:
|
|
# $1 (Required) - String to be trimmed
|
|
# OUTS:
|
|
# stdout: Prints string with leading/trailing whitespace removed
|
|
# USAGE:
|
|
# text=$(_trim_ <<<"$1")
|
|
# printf "%s" "STRING" | _trim_
|
|
# NOTE:
|
|
# Used through a pipe or here string.
|
|
|
|
awk '{$1=$1;print}'
|
|
}
|
|
|
|
_upper_() {
|
|
# DESC:
|
|
# Convert a string to uppercase. Used through a pipe or here string.
|
|
# ARGS:
|
|
# None
|
|
# OUTS:
|
|
# None
|
|
# USAGE:
|
|
# text=$(_upper_ <<<"$1")
|
|
# printf "%s" "STRING" | _upper_
|
|
tr '[:lower:]' '[:upper:]'
|
|
}
|