m-chrzan.xyz
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xarticle2pdf21
1 files changed, 19 insertions, 2 deletions
diff --git a/article2pdf b/article2pdf
index 4259ee4..dcef441 100755
--- a/article2pdf
+++ b/article2pdf
@@ -11,11 +11,28 @@ html_file=`mktemp`
user_agent='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0'
curl --user-agent "$user_agent" --location --silent "$1" > "$html_file"
-pdf_file=$pdfs_dir/`pup --file "$html_file" 'title text{}' \
+title=`pup --file "$html_file" 'title text{}' \
| grep -v '^\s*$' \
| head -n 1 \
- | sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'`.pdf
+ | sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'`
+echo "title based on html: |$title|"
+
+if [ -z "$title" ]; then
+ title=${1##*/}
+ title=${title%.*}
+fi
+
+echo "final title: $title"
+
+#pdf_file=$pdfs_dir/`pup --file "$html_file" 'title text{}' \
+ #| grep -v '^\s*$' \
+ #| head -n 1 \
+ #| sed -e 's/^ *//' -e 's/ *$//' -e 's/ /-/g'`.pdf
+
+pdf_file="$pdfs_dir/$title.pdf"
+
+echo "pdf file: $pdf_file"
if pandoc --request-header User-Agent:"$user_agent" "$1" --pdf-engine=xelatex -o "$pdf_file"; then
exit 0