diff options
Diffstat (limited to 'article2pdf')
| -rwxr-xr-x | article2pdf | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/article2pdf b/article2pdf index 4259ee4..dcef441 100755 --- a/article2pdf +++ b/article2pdf @@ -11,11 +11,28 @@ html_file=`mktemp` user_agent='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0' curl --user-agent "$user_agent" --location --silent "$1" > "$html_file" -pdf_file=$pdfs_dir/`pup --file "$html_file" 'title text{}' \ +title=`pup --file "$html_file" 'title text{}' \ | grep -v '^\s*$' \ | head -n 1 \ - | sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'`.pdf + | sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'` +echo "title based on html: |$title|" + +if [ -z "$title" ]; then + title=${1##*/} + title=${title%.*} +fi + +echo "final title: $title" + +#pdf_file=$pdfs_dir/`pup --file "$html_file" 'title text{}' \ + #| grep -v '^\s*$' \ + #| head -n 1 \ + #| sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'`.pdf + +pdf_file="$pdfs_dir/$title.pdf" + +echo "pdf file: $pdf_file" if pandoc --request-header User-Agent:"$user_agent" "$1" --pdf-engine=xelatex -o "$pdf_file"; then exit 0 |