From efacc5bb53b59f23623591a52a69fc1f085521ab Mon Sep 17 00:00:00 2001 From: bellecp <1019692+bellecp@users.noreply.github.com> Date: Sat, 30 Dec 2017 09:51:08 -0500 Subject: [PATCH] Add example: Search for academic PDFs by author, title, journal, institution. From #1183. --- Examples.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/Examples.md b/Examples.md index c605a71..70de5e9 100644 --- a/Examples.md +++ b/Examples.md @@ -39,7 +39,7 @@ Table of Contents * [Wrapper](#wrapper) * [LastPass CLI](#lastpass-cli) * [fzf-marker](#fzf-marker) - +* [Search for academic pdfs by author, title, keywords, abstract](#search-for-academic-pdfs-by-author-title-journal-institution) @@ -1012,4 +1012,55 @@ zle -N _fzf_marker_main_widget zle -N _fzf_marker_placeholder_widget bindkey "${FZF_MARKER_MAIN_KEY:-\C-@}" _fzf_marker_main_widget bindkey "${FZF_MARKER_PLACEHOLDER_KEY:-\C-v}" _fzf_marker_placeholder_widget -``` \ No newline at end of file +``` + + +### Search for academic PDFs by author, title, journal, institution + +Search for all pdf files. FZF will match the query against any text found on the first page of the PDF. For instance, one can query for author names, article title, journal, institutions, keywords. It works by extracting the text on the first page of the PDF using ``pdftotext``. +The selected file is then opened by the default pdf viewer. + +Requires the [pdftotext](https://en.wikipedia.org/wiki/Pdftotext) command line tool. Tested on Ubuntu 17.10. + +![](https://user-images.githubusercontent.com/1019692/34446795-12229072-ecac-11e7-856a-ec0df0de60ae.gif) +``` + +p () { + local DIR open + declare -A already + DIR="${HOME}/.cache/pdftotext" + mkdir -p "${DIR}" + if [ "$(uname)" = "Darwin" ]; then + open=open + else + open="gio open" + fi + + { + ag -g ".pdf$"; # fast, without pdftotext + ag -g ".pdf$" \ + | while read -r FILE; do + local EXPIRY HASH CACHE + HASH=$(md5sum "$FILE" | cut -c 1-32) + # Remove duplicates (file that has same hash as already seen file) + [ ${already[$HASH]+abc} ] && continue # see https://stackoverflow.com/a/13221491 + already[$HASH]=$HASH + EXPIRY=$(( 86400 + $RANDOM * 20 )) # 1 day (86400 seconds) plus some random + CMD="pdftotext -f 1 -l 1 '$FILE' - 2>/dev/null | tr \"\n\" \"_\" " + CACHE="$DIR/$HASH" + test -f "${CACHE}" && [ $(expr $(date +%s) - $(date -r "$CACHE" +%s)) -le $EXPIRY ] || {touch ${CACHE}; eval "$CMD" > "${CACHE}"} + echo -e "$FILE\t$(cat ${CACHE})" + done + } | fzf -e -d '\t' \ + --preview-window up:75% \ + --preview ' + v=$(echo {q} | tr " " "|"); + echo {1} | grep -E "^|$v" -i --color=always; + pdftotext -f 1 -l 1 {1} - | grep -E "^|$v" -i --color=always' \ + | awk 'BEGIN {FS="\t"; OFS="\t"}; {print "\""$1"\""}' \ + | xargs $open > /dev/null 2> /dev/null +} + + +``` +