diff options
| author | Marcin Chrzanowski <m@m-chrzan.xyz> | 2025-12-12 13:25:04 +0100 |
|---|---|---|
| committer | Marcin Chrzanowski <m@m-chrzan.xyz> | 2025-12-12 13:25:04 +0100 |
| commit | c3586a8cda0db26057288dfe9a1e75d710efbb3c (patch) | |
| tree | be5dce6323d4273031fe272d588f610491bf5672 /article2pdf | |
| parent | 55604140337ae635d20983ae6ea0a1781b61ad8d (diff) | |
Update article2pdf
Diffstat (limited to 'article2pdf')
| -rwxr-xr-x | article2pdf | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/article2pdf b/article2pdf index 4259ee4..dcef441 100755 --- a/article2pdf +++ b/article2pdf @@ -11,11 +11,28 @@ html_file=`mktemp` user_agent='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0' curl --user-agent "$user_agent" --location --silent "$1" > "$html_file" -pdf_file=$pdfs_dir/`pup --file "$html_file" 'title text{}' \ +title=`pup --file "$html_file" 'title text{}' \ | grep -v '^\s*$' \ | head -n 1 \ - | sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'`.pdf + | sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'` +echo "title based on html: |$title|" + +if [ -z "$title" ]; then + title=${1##*/} + title=${title%.*} +fi + +echo "final title: $title" + +#pdf_file=$pdfs_dir/`pup --file "$html_file" 'title text{}' \ + #| grep -v '^\s*$' \ + #| head -n 1 \ + #| sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'`.pdf + +pdf_file="$pdfs_dir/$title.pdf" + +echo "pdf file: $pdf_file" if pandoc --request-header User-Agent:"$user_agent" "$1" --pdf-engine=xelatex -o "$pdf_file"; then exit 0 |