]> gitweb.hhaalo.de Git - scantopdf.git/commitdiff
add scan to pdf scripts
authorBastian Dehn <hhaalo@arcor.de>
Sun, 22 Dec 2024 09:49:51 +0000 (10:49 +0100)
committerBastian Dehn <hhaalo@arcor.de>
Sun, 22 Dec 2024 09:49:51 +0000 (10:49 +0100)
scantopdf [new file with mode: 0755]
scantopdfbw [new file with mode: 0755]
scantopdfgray [new file with mode: 0755]

diff --git a/scantopdf b/scantopdf
new file mode 100755 (executable)
index 0000000..2cba38f
--- /dev/null
+++ b/scantopdf
@@ -0,0 +1,141 @@
+#!/bin/bash
+#
+optimize()
+{
+       local input=$1
+       local output=$input-out
+
+       qpdf --linearize $input $output
+       mv $output $input
+}
+
+addocr()
+{
+       local input=$1
+       local output=$input-out
+       local error=""
+
+       echo $input
+       error=$(ocrmypdf -l deu $input $output 2>&1)
+
+       if [ -n "$(echo $error | grep offset)" ]; then
+               printf "%s: %s\n" $input $error >> $HOME/addocr-error.log
+       fi
+
+       if [ -f $output ]; then
+               mv $output $input
+               optimize $input
+       fi
+}
+
+createonepdf()
+{
+       local output=$1
+
+       for i in scanned*.tiff; do
+               tiff2pdf $i -o ${i/.tiff/.pdf}
+               rm $i
+       done
+
+       pdftk scanned*.pdf output $output
+       rm scanned*.pdf
+       addocr $output
+}
+
+createpdf()
+{
+       local start=$1
+       local end=$2
+       local countno=
+       local pdfs=()
+
+       for i in $(seq $start $end); do
+               countno=$(printf "%02d" $i)
+               pdfs=$(echo $pdfs "scanned$countno.pdf")
+       done
+
+       pdftk $pdfs output out$(printf "%02d" $start).pdf
+       addocr out$(printf "%02d" $start).pdf
+
+       for i in $pdfs; do
+               rm $i
+       done
+}
+
+detectsplit()
+{
+       local pdf=$1
+       local trenn=
+
+       trenn=$(zbarimg --raw --quiet $pdf)
+       if [ "$trenn" == "Trennblatt" ]; then
+               echo "true"
+               return
+       fi
+
+       trenn=$(dmtxread $pdf)
+       if [ "$trenn" == "Trennblatt" ]; then
+               echo "true"
+               return
+       fi
+
+       echo "false"
+}
+
+createmultipdfs()
+{
+       local trenn=$1
+       local startcount=1
+       local endcount=1
+       local pdf=
+
+       for i in scanned*.tiff; do
+               pdf=${i/.tiff/.pdf}
+               tiff2pdf $i -o $pdf
+               trenn=$(detectsplit $pdf)
+               rm $i
+               if [ "$trenn" == "true" ]; then
+                       createpdf $startcount $((endcount - 1))
+                       startcount=$((endcount + 1))
+                       rm $pdf
+               fi
+               ((endcount++))
+       done
+}
+
+main()
+{
+       if [ -z "$1" ]; then
+               echo "ERROR: no page count"
+               exit 1
+       fi
+
+       if [ -z "$2" ]; then
+               echo "ERROR: no pdf file name"
+               exit 1
+       fi
+
+       local pagecount=$1
+       local output=$2
+
+       scanimage --resolution=300 \
+               --skip-blank-pages=yes \
+               --format=tiff \
+               --batch-count=$pagecount \
+               --batch-increment=1 \
+               --batch=scanned%02d.tiff
+
+       if [ $? -gt 0 ]; then
+               echo "ERROR: scan error"
+               exit 1
+       fi
+
+       if [ "$output" == "multi" ]; then
+               createmultipdfs $pagecount
+               exit 0
+       fi
+
+       createonepdf $output
+}
+
+main $*
diff --git a/scantopdfbw b/scantopdfbw
new file mode 100755 (executable)
index 0000000..f149327
--- /dev/null
@@ -0,0 +1,142 @@
+#!/bin/bash
+#
+optimize()
+{
+       local input=$1
+       local output=$input-out
+
+       qpdf --linearize $input $output
+       mv $output $input
+}
+
+addocr()
+{
+       local input=$1
+       local output=$input-out
+       local error=""
+
+       echo $input
+       error=$(ocrmypdf -l deu $input $output 2>&1)
+
+       if [ -n "$(echo $error | grep offset)" ]; then
+               printf "%s: %s\n" $input $error >> $HOME/addocr-error.log
+       fi
+
+       if [ -f $output ]; then
+               mv $output $input
+               optimize $input
+       fi
+}
+
+createonepdf()
+{
+       local output=$1
+
+       for i in scanned*.tiff; do
+               tiff2pdf $i -o ${i/.tiff/.pdf}
+               rm $i
+       done
+
+       pdftk scanned*.pdf output $output
+       rm scanned*.pdf
+       addocr $output
+}
+
+createpdf()
+{
+       local start=$1
+       local end=$2
+       local countno=
+       local pdfs=()
+
+       for i in $(seq $start $end); do
+               countno=$(printf "%02d" $i)
+               pdfs=$(echo $pdfs "scanned$countno.pdf")
+       done
+
+       pdftk $pdfs output out$(printf "%02d" $start).pdf
+       addocr out$(printf "%02d" $start).pdf
+
+       for i in $pdfs; do
+               rm $i
+       done
+}
+
+detectsplit()
+{
+       local pdf=$1
+       local trenn=
+
+       trenn=$(zbarimg --raw --quiet $pdf)
+       if [ "$trenn" == "Trennblatt" ]; then
+               echo "true"
+               return
+       fi
+
+       trenn=$(dmtxread $pdf)
+       if [ "$trenn" == "Trennblatt" ]; then
+               echo "true"
+               return
+       fi
+
+       echo "false"
+}
+
+createmultipdfs()
+{
+       local trenn=$1
+       local startcount=1
+       local endcount=1
+       local pdf=
+
+       for i in scanned*.tiff; do
+               pdf=${i/.tiff/.pdf}
+               tiff2pdf $i -o $pdf
+               trenn=$(detectsplit $pdf)
+               rm $i
+               if [ "$trenn" == "true" ]; then
+                       createpdf $startcount $((endcount - 1))
+                       startcount=$((endcount + 1))
+                       rm $pdf
+               fi
+               ((endcount++))
+       done
+}
+
+main()
+{
+       if [ -z "$1" ]; then
+               echo "ERROR: no page count"
+               exit 1
+       fi
+
+       if [ -z "$2" ]; then
+               echo "ERROR: no pdf file name"
+               exit 1
+       fi
+
+       local pagecount=$1
+       local output=$2
+
+       scanimage --resolution=300 \
+               --mode=BW \
+               --skip-blank-pages=yes \
+               --format=tiff \
+               --batch-count=$1 \
+               --batch-increment=1 \
+               --batch=scanned%02d.tiff
+
+       if [ $? -gt 0 ]; then
+               echo "ERROR: scan error"
+               exit 1
+       fi
+
+       if [ "$output" == "multi" ]; then
+               createmultipdfs $pagecount
+               exit 0
+       fi
+
+       createonepdf $output
+}
+
+main $*
diff --git a/scantopdfgray b/scantopdfgray
new file mode 100755 (executable)
index 0000000..2abc9ab
--- /dev/null
@@ -0,0 +1,142 @@
+#!/bin/bash
+#
+optimize()
+{
+       local input=$1
+       local output=$input-out
+
+       qpdf --linearize $input $output
+       mv $output $input
+}
+
+addocr()
+{
+       local input=$1
+       local output=$input-out
+       local error=""
+
+       echo $input
+       error=$(ocrmypdf -l deu $input $output 2>&1)
+
+       if [ -n "$(echo $error | grep offset)" ]; then
+               printf "%s: %s\n" $input $error >> $HOME/addocr-error.log
+       fi
+
+       if [ -f $output ]; then
+               mv $output $input
+               optimize $input
+       fi
+}
+
+createonepdf()
+{
+       local output=$1
+
+       for i in scanned*.tiff; do
+               tiff2pdf $i -o ${i/.tiff/.pdf}
+               rm $i
+       done
+
+       pdftk scanned*.pdf output $output
+       rm scanned*.pdf
+       addocr $output
+}
+
+createpdf()
+{
+       local start=$1
+       local end=$2
+       local countno=
+       local pdfs=()
+
+       for i in $(seq $start $end); do
+               countno=$(printf "%02d" $i)
+               pdfs=$(echo $pdfs "scanned$countno.pdf")
+       done
+
+       pdftk $pdfs output out$(printf "%02d" $start).pdf
+       addocr out$(printf "%02d" $start).pdf
+
+       for i in $pdfs; do
+               rm $i
+       done
+}
+
+detectsplit()
+{
+       local pdf=$1
+       local trenn=
+
+       trenn=$(zbarimg --raw --quiet $pdf)
+       if [ "$trenn" == "Trennblatt" ]; then
+               echo "true"
+               return
+       fi
+
+       trenn=$(dmtxread $pdf)
+       if [ "$trenn" == "Trennblatt" ]; then
+               echo "true"
+               return
+       fi
+
+       echo "false"
+}
+
+createmultipdfs()
+{
+       local trenn=$1
+       local startcount=1
+       local endcount=1
+       local pdf=
+
+       for i in scanned*.tiff; do
+               pdf=${i/.tiff/.pdf}
+               tiff2pdf $i -o $pdf
+               trenn=$(detectsplit $pdf)
+               rm $i
+               if [ "$trenn" == "true" ]; then
+                       createpdf $startcount $((endcount - 1))
+                       startcount=$((endcount + 1))
+                       rm $pdf
+               fi
+               ((endcount++))
+       done
+}
+
+main()
+{
+       if [ -z "$1" ]; then
+               echo "ERROR: no page count"
+               exit 1
+       fi
+
+       if [ -z "$2" ]; then
+               echo "ERROR: no pdf file name"
+               exit 1
+       fi
+
+       local pagecount=$1
+       local output=$2
+
+       scanimage --resolution=300 \
+               --mode=Gray \
+               --skip-blank-pages=yes \
+               --format=tiff \
+               --batch-count=$1 \
+               --batch-increment=1 \
+               --batch=scanned%02d.tiff
+
+       if [ $? -gt 0 ]; then
+               echo "ERROR: scan error"
+               exit 1
+       fi
+
+       if [ "$output" == "multi" ]; then
+               createmultipdfs $pagecount
+               exit 0
+       fi
+
+       createonepdf $output
+}
+
+main $*