m/fzf
1
0
mirror of https://github.com/junegunn/fzf.git synced 2025-11-18 16:45:38 -05:00

Add example: Search for academic PDFs by author, title, journal, institution. From #1183.

bellecp
2017-12-30 09:51:08 -05:00
parent 1a0de0c9f8
commit efacc5bb53

@@ -39,7 +39,7 @@ Table of Contents
* [Wrapper](#wrapper) * [Wrapper](#wrapper)
* [LastPass CLI](#lastpass-cli) * [LastPass CLI](#lastpass-cli)
* [fzf-marker](#fzf-marker) * [fzf-marker](#fzf-marker)
* [Search for academic pdfs by author, title, keywords, abstract](#search-for-academic-pdfs-by-author-title-journal-institution)
@@ -1012,4 +1012,55 @@ zle -N _fzf_marker_main_widget
zle -N _fzf_marker_placeholder_widget zle -N _fzf_marker_placeholder_widget
bindkey "${FZF_MARKER_MAIN_KEY:-\C-@}" _fzf_marker_main_widget bindkey "${FZF_MARKER_MAIN_KEY:-\C-@}" _fzf_marker_main_widget
bindkey "${FZF_MARKER_PLACEHOLDER_KEY:-\C-v}" _fzf_marker_placeholder_widget bindkey "${FZF_MARKER_PLACEHOLDER_KEY:-\C-v}" _fzf_marker_placeholder_widget
``` ```
### Search for academic PDFs by author, title, journal, institution
Search for all pdf files. FZF will match the query against any text found on the first page of the PDF. For instance, one can query for author names, article title, journal, institutions, keywords. It works by extracting the text on the first page of the PDF using ``pdftotext``.
The selected file is then opened by the default pdf viewer.
Requires the [pdftotext](https://en.wikipedia.org/wiki/Pdftotext) command line tool. Tested on Ubuntu 17.10.
![](https://user-images.githubusercontent.com/1019692/34446795-12229072-ecac-11e7-856a-ec0df0de60ae.gif)
```
p () {
local DIR open
declare -A already
DIR="${HOME}/.cache/pdftotext"
mkdir -p "${DIR}"
if [ "$(uname)" = "Darwin" ]; then
open=open
else
open="gio open"
fi
{
ag -g ".pdf$"; # fast, without pdftotext
ag -g ".pdf$" \
| while read -r FILE; do
local EXPIRY HASH CACHE
HASH=$(md5sum "$FILE" | cut -c 1-32)
# Remove duplicates (file that has same hash as already seen file)
[ ${already[$HASH]+abc} ] && continue # see https://stackoverflow.com/a/13221491
already[$HASH]=$HASH
EXPIRY=$(( 86400 + $RANDOM * 20 )) # 1 day (86400 seconds) plus some random
CMD="pdftotext -f 1 -l 1 '$FILE' - 2>/dev/null | tr \"\n\" \"_\" "
CACHE="$DIR/$HASH"
test -f "${CACHE}" && [ $(expr $(date +%s) - $(date -r "$CACHE" +%s)) -le $EXPIRY ] || {touch ${CACHE}; eval "$CMD" > "${CACHE}"}
echo -e "$FILE\t$(cat ${CACHE})"
done
} | fzf -e -d '\t' \
--preview-window up:75% \
--preview '
v=$(echo {q} | tr " " "|");
echo {1} | grep -E "^|$v" -i --color=always;
pdftotext -f 1 -l 1 {1} - | grep -E "^|$v" -i --color=always' \
| awk 'BEGIN {FS="\t"; OFS="\t"}; {print "\""$1"\""}' \
| xargs $open > /dev/null 2> /dev/null
}
```