Last active 1 month ago

Revision fd1d9c1c9d2c82941e9f189066f01ecc2b60cfcd

pdfify.sh Raw
1#!/usr/bin/env bash
2set -euo pipefail
3
4# pdfify — Convert Markdown to beautiful PDF via Docker
5# Supports: images, mermaid diagrams, tables, code blocks, Obsidian callouts
6# Usage: ./pdfify <file.md> [file2.md ...] [options]
7
8VERSION="1.2.0"
9IMAGE_NAME="pdfify"
10GIST_ID="23f4514a1f0da1347d3f89926c23b68f"
11GIST_RAW="https://gist.githubusercontent.com/jclement/${GIST_ID}/raw/pdfify.sh"
12SELF="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
13
14# --- Colors ---
15RED='\033[0;31m'
16GREEN='\033[0;32m'
17YELLOW='\033[0;33m'
18BLUE='\033[0;34m'
19MAGENTA='\033[0;35m'
20CYAN='\033[0;36m'
21BOLD='\033[1m'
22DIM='\033[2m'
23RESET='\033[0m'
24
25# --- Portable SHA-256 (macOS has shasum, Linux often has sha256sum) ---
26_sha256() { shasum -a 256 "$@" 2>/dev/null || sha256sum "$@"; }
27
28info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
29success() { echo -e "${GREEN}${RESET} $*"; }
30warn() { echo -e "${YELLOW}${RESET} $*"; }
31detail() { echo -e " ${DIM}${RESET} $*"; }
32header() { echo -e "\n${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"; echo -e "${MAGENTA} ${BOLD}$*${RESET}"; echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"; }
33
34# --- Self-update ---
35do_update() {
36 info "Updating pdfify from gist..."
37 local raw_url tmp
38 raw_url=$(curl -fsSL "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//')
39 [[ -z "$raw_url" ]] && raw_url="$GIST_RAW"
40 tmp=$(mktemp)
41 if curl -fsSL "$raw_url" -o "$tmp" 2>/dev/null; then
42 if [[ -s "$tmp" ]] && head -1 "$tmp" | grep -q '^#!/'; then
43 chmod +x "$tmp"
44 mv "$tmp" "$SELF"
45 success "Updated to latest version"
46 detail "${CYAN}${SELF}${RESET}"
47 else
48 rm -f "$tmp"
49 echo -e "${RED}Error:${RESET} Downloaded file doesn't look like a script"
50 exit 1
51 fi
52 else
53 rm -f "$tmp"
54 echo -e "${RED}Error:${RESET} Failed to download update"
55 exit 1
56 fi
57 exit 0
58}
59
60check_for_update() {
61 local remote_hash local_hash raw_url
62 raw_url=$(curl -fsSL --connect-timeout 2 --max-time 3 "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//') || return 0
63 [[ -z "$raw_url" ]] && return 0
64 remote_hash=$(curl -fsSL --connect-timeout 2 --max-time 5 "$raw_url" 2>/dev/null | _sha256 | cut -d' ' -f1) || return 0
65 local_hash=$(_sha256 < "$SELF" | cut -d' ' -f1)
66 if [[ -n "$remote_hash" && "$remote_hash" != "$local_hash" ]]; then
67 echo -e "${YELLOW}${RESET} ${DIM}A newer version of pdfify is available. Run ${CYAN}pdfify --update${DIM} to upgrade.${RESET}"
68 fi
69}
70
71# --- Args (CLI overrides frontmatter; "" means "use frontmatter default") ---
72REBUILD=0
73WATCH=0
74OPEN=0
75PREVIEW=0
76OUT_FILE=""
77NEXT_KEY=""
78POSITIONAL=()
79
80# CLI overrides — empty string means "not set, defer to frontmatter"
81CLI_TOC_LEVEL=""
82CLI_NUMBERS=""
83CLI_NUMBER_FROM=""
84CLI_TITLE=""
85CLI_SUBTITLE=""
86CLI_AUTHOR=""
87CLI_HEADER=""
88CLI_FOOTER=""
89CLI_DATE=""
90CLI_WATERMARK=""
91
92for arg in "$@"; do
93 if [[ -n "$NEXT_KEY" ]]; then
94 case "$NEXT_KEY" in
95 toc-level) CLI_TOC_LEVEL="$arg" ;;
96 number-from) CLI_NUMBER_FROM="$arg" ;;
97 out) OUT_FILE="$arg" ;;
98 title) CLI_TITLE="$arg" ;;
99 subtitle) CLI_SUBTITLE="$arg" ;;
100 author) CLI_AUTHOR="$arg" ;;
101 header) CLI_HEADER="$arg" ;;
102 footer) CLI_FOOTER="$arg" ;;
103 date) CLI_DATE="$arg" ;;
104 watermark) CLI_WATERMARK="$arg" ;;
105 esac
106 NEXT_KEY=""
107 continue
108 fi
109 case "$arg" in
110 --rebuild) REBUILD=1 ;;
111 --update) do_update ;;
112 --watch) WATCH=1 ;;
113 --open) OPEN=1 ;;
114 --preview) PREVIEW=1; OPEN=1 ;;
115 --no-numbers) CLI_NUMBERS="false" ;;
116 --numbers) CLI_NUMBERS="true" ;;
117 --clean) echo -e "${BLUE}::${RESET} ${BOLD}Removing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
118 docker rmi "$IMAGE_NAME" >/dev/null 2>&1 && echo -e "${GREEN}${RESET} Image removed" || echo -e "${DIM}Image not found${RESET}"
119 exit 0 ;;
120 --toc-level) NEXT_KEY="toc-level" ;;
121 --toc-level=*) CLI_TOC_LEVEL="${arg#*=}" ;;
122 --number-from) NEXT_KEY="number-from" ;;
123 --number-from=*) CLI_NUMBER_FROM="${arg#*=}" ;;
124 --out) NEXT_KEY="out" ;;
125 --out=*) OUT_FILE="${arg#*=}" ;;
126 --title) NEXT_KEY="title" ;;
127 --title=*) CLI_TITLE="${arg#*=}" ;;
128 --subtitle) NEXT_KEY="subtitle" ;;
129 --subtitle=*) CLI_SUBTITLE="${arg#*=}" ;;
130 --author) NEXT_KEY="author" ;;
131 --author=*) CLI_AUTHOR="${arg#*=}" ;;
132 --header) NEXT_KEY="header" ;;
133 --header=*) CLI_HEADER="${arg#*=}" ;;
134 --footer) NEXT_KEY="footer" ;;
135 --footer=*) CLI_FOOTER="${arg#*=}" ;;
136 --date) NEXT_KEY="date" ;;
137 --date=*) CLI_DATE="${arg#*=}" ;;
138 --watermark) NEXT_KEY="watermark" ;;
139 --watermark=*) CLI_WATERMARK="${arg#*=}" ;;
140 --version) echo "pdfify v${VERSION}"; exit 0 ;;
141 --help|-h) echo -e "${BOLD}pdfify${RESET} v${VERSION} — Markdown to PDF"
142 echo ""
143 echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
144 echo ""
145 echo -e "${BOLD}Options:${RESET}"
146 echo -e " ${DIM}--out FILE${RESET} Output file (single input only)"
147 echo -e " ${DIM}--toc-level N${RESET} TOC depth: 0=none, 1=H1, 2=H2, 3=H3 (default: 3)"
148 echo -e " ${DIM}--numbers${RESET} Enable numbered headings (default)"
149 echo -e " ${DIM}--no-numbers${RESET} Disable numbered headings"
150 echo -e " ${DIM}--number-from N${RESET} Start numbering at heading level N (default: 2)"
151 echo -e " ${DIM}--open${RESET} Open PDF after generation"
152 echo -e " ${DIM}--preview${RESET} Render to /tmp and open (no permanent file)"
153 echo -e " ${DIM}--watch${RESET} Watch for changes and regenerate"
154 echo -e " ${DIM}--rebuild${RESET} Force rebuild the Docker image"
155 echo -e " ${DIM}--clean${RESET} Remove the Docker image"
156 echo -e " ${DIM}--update${RESET} Update pdfify to latest version from gist"
157 echo -e " ${DIM}--version${RESET} Show version"
158 echo ""
159 echo -e "${BOLD}Overrides${RESET} (CLI trumps frontmatter):"
160 echo -e " ${DIM}--title TEXT${RESET} ${DIM}--subtitle TEXT${RESET}"
161 echo -e " ${DIM}--author TEXT${RESET} ${DIM}--header TEXT${RESET}"
162 echo -e " ${DIM}--footer TEXT${RESET} ${DIM}--date TEXT${RESET}"
163 echo -e " ${DIM}--watermark TEXT${RESET}"
164 echo ""
165 echo -e "${BOLD}Frontmatter:${RESET}"
166 echo -e " title, subtitle, author, header, footer, toc-level, date,"
167 echo -e " numbersections (true/false), numberfrom (1-4), watermark"
168 exit 0 ;;
169 *) POSITIONAL+=("$arg") ;;
170 esac
171done
172
173if [[ ${#POSITIONAL[@]} -lt 1 ]]; then
174 echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
175 echo -e " Run ${CYAN}pdfify --help${RESET} for all options"
176 exit 1
177fi
178
179if [[ -n "$OUT_FILE" && ${#POSITIONAL[@]} -gt 1 ]]; then
180 echo -e "${RED}Error:${RESET} --out cannot be used with multiple input files"
181 exit 1
182fi
183
184# --- Open helper ---
185open_pdf() {
186 local pdf="$1"
187 if command -v open >/dev/null 2>&1; then
188 open "$pdf"
189 elif command -v xdg-open >/dev/null 2>&1; then
190 xdg-open "$pdf"
191 fi
192}
193
194header "pdfify v${VERSION}"
195
196# --- Embedded Dockerfile ---
197DOCKERFILE=$(cat <<'DOCKERFILE_END'
198FROM node:20-slim
199ENV DEBIAN_FRONTEND=noninteractive
200RUN apt-get update -qq && \
201 apt-get install -y --no-install-recommends \
202 pandoc \
203 texlive-latex-recommended \
204 texlive-latex-extra \
205 texlive-fonts-recommended \
206 texlive-fonts-extra \
207 texlive-xetex \
208 lmodern \
209 librsvg2-bin \
210 chromium \
211 ca-certificates \
212 fonts-liberation \
213 fonts-roboto \
214 fonts-roboto-unhinted \
215 fonts-noto-color-emoji \
216 wget \
217 fontconfig \
218 && rm -rf /var/lib/apt/lists/*
219RUN mkdir -p /usr/share/fonts/truetype/roboto-mono && \
220 for style in Regular Bold Italic BoldItalic Medium MediumItalic Light LightItalic; do \
221 wget -q "https://github.com/googlefonts/RobotoMono/raw/main/fonts/ttf/RobotoMono-${style}.ttf" \
222 -O "/usr/share/fonts/truetype/roboto-mono/RobotoMono-${style}.ttf" 2>/dev/null || true; \
223 done && \
224 fc-cache -f
225RUN npm install -g @mermaid-js/mermaid-cli
226ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
227ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
228ENV CHROME_PATH=/usr/bin/chromium
229RUN echo '{"maxTextSize": 90000, "flowchart": {"useMaxWidth": true}, "theme": "default"}' > /opt/mermaid-config.json
230RUN echo '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]}' > /opt/puppeteer-config.json
231WORKDIR /work
232ENTRYPOINT ["/bin/bash"]
233DOCKERFILE_END
234)
235
236# --- Build Docker image ---
237echo ""
238if [[ $REBUILD -eq 1 ]]; then
239 info "Removing existing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
240 docker rmi "$IMAGE_NAME" >/dev/null 2>&1 || true
241 success "Image removed"
242fi
243
244if docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
245 info "Docker image ${GREEN}${IMAGE_NAME}${RESET} found ${DIM}(cached)${RESET}"
246 success "Reusing existing image"
247else
248 info "Building Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
249 detail "Installing: pandoc, XeLaTeX, mermaid-cli, Chromium, fonts"
250 detail "This takes 2-3 minutes on first run (cached after)"
251 echo ""
252 BUILD_CTX=$(mktemp -d)
253 echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX" 2>&1 | while IFS= read -r line; do
254 if [[ "$line" =~ ^Step\ ([0-9]+)/([0-9]+) ]]; then
255 echo -e " ${CYAN}[${BASH_REMATCH[1]}/${BASH_REMATCH[2]}]${RESET} ${DIM}${line#*: }${RESET}"
256 elif [[ "$line" == *"Successfully tagged"* ]]; then
257 echo -e " ${GREEN}${line}${RESET}"
258 elif [[ "$line" == *"ERROR"* || "$line" == *"error"* ]]; then
259 echo -e " ${RED}${line}${RESET}"
260 fi
261 done
262 rm -rf "$BUILD_CTX"
263 if ! docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
264 echo -e "\n${RED}Docker build failed. Re-running with full output:${RESET}\n"
265 BUILD_CTX=$(mktemp -d)
266 echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX"
267 rm -rf "$BUILD_CTX"
268 exit 1
269 fi
270 success "Docker image built"
271fi
272
273# === Per-file conversion ===
274convert_file() {
275 local INPUT_PATH="$1"
276 local OUTPUT_OVERRIDE="$2"
277
278 # --- Resolve paths ---
279 local INPUT INPUT_DIR INPUT_FILE OUTPUT OUTPUT_DIR OUTPUT_FILE
280 INPUT="$(cd "$(dirname "$INPUT_PATH")" && pwd)/$(basename "$INPUT_PATH")"
281 if [[ ! -f "$INPUT" ]]; then
282 echo -e "${RED}Error:${RESET} $INPUT_PATH not found"
283 return 1
284 fi
285 INPUT_DIR="$(dirname "$INPUT")"
286 INPUT_FILE="$(basename "$INPUT")"
287 OUTPUT="${OUTPUT_OVERRIDE:-${INPUT%.md}.pdf}"
288 OUTPUT_DIR="$(cd "$(dirname "$OUTPUT")" 2>/dev/null && pwd || (mkdir -p "$(dirname "$OUTPUT")" && cd "$(dirname "$OUTPUT")" && pwd))"
289 OUTPUT="${OUTPUT_DIR}/$(basename "$OUTPUT")"
290 OUTPUT_FILE="$(basename "$OUTPUT")"
291
292 # Preview mode: write temp file in input dir (Docker-mountable), move to /tmp after
293 local PREVIEW_FINAL=""
294 if [[ $PREVIEW -eq 1 ]]; then
295 local base="${INPUT_FILE%.md}"
296 PREVIEW_FINAL="/tmp/pdfify-preview-${base}.pdf"
297 OUTPUT_FILE=".pdfify-preview-${base}.pdf"
298 OUTPUT="${OUTPUT_DIR}/${OUTPUT_FILE}"
299 fi
300
301 info "Input: ${CYAN}${INPUT}${RESET}"
302 if [[ -n "$PREVIEW_FINAL" ]]; then
303 info "Output: ${CYAN}${PREVIEW_FINAL}${RESET} ${DIM}(preview)${RESET}"
304 else
305 info "Output: ${CYAN}${OUTPUT}${RESET}"
306 fi
307
308 # --- Parse YAML frontmatter ---
309 local FM_TITLE="" FM_SUBTITLE="" FM_AUTHOR="" FM_FOOTER="" FM_HEADER=""
310 local FM_TOC_LEVEL="" FM_DATE="" FM_NUMBERSECTIONS="" FM_NUMBERFROM="" FM_WATERMARK=""
311 local FM_DATE_HASH="" FM_DATE_DIRTY="" FM_DATE_LABEL=""
312
313 if head -1 "$INPUT" | grep -q '^---'; then
314 local FM_BLOCK
315 FM_BLOCK=$(awk 'NR==1 && /^---/{found=1; next} found && /^---/{exit} found{print}' "$INPUT")
316 extract_fm() { echo "$FM_BLOCK" | sed -n "s/^$1:[[:space:]]*//p" | sed 's/^["'"'"']\(.*\)["'"'"']$/\1/'; }
317 FM_TITLE=$(extract_fm "title")
318 FM_AUTHOR=$(extract_fm "author")
319 FM_SUBTITLE=$(extract_fm "subtitle")
320 FM_FOOTER=$(extract_fm "footer")
321 FM_HEADER=$(extract_fm "header")
322 FM_TOC_LEVEL=$(extract_fm "toc-level")
323 FM_DATE=$(extract_fm "date")
324 FM_NUMBERSECTIONS=$(extract_fm "numbersections")
325 FM_NUMBERFROM=$(extract_fm "numberfrom")
326 FM_WATERMARK=$(extract_fm "watermark")
327 fi
328
329 # --- CLI overrides frontmatter ---
330 [[ -n "$CLI_TITLE" ]] && FM_TITLE="$CLI_TITLE"
331 [[ -n "$CLI_SUBTITLE" ]] && FM_SUBTITLE="$CLI_SUBTITLE"
332 [[ -n "$CLI_AUTHOR" ]] && FM_AUTHOR="$CLI_AUTHOR"
333 [[ -n "$CLI_FOOTER" ]] && FM_FOOTER="$CLI_FOOTER"
334 [[ -n "$CLI_HEADER" ]] && FM_HEADER="$CLI_HEADER"
335 [[ -n "$CLI_DATE" ]] && FM_DATE="$CLI_DATE"
336 [[ -n "$CLI_WATERMARK" ]] && FM_WATERMARK="$CLI_WATERMARK"
337 [[ -n "$CLI_TOC_LEVEL" ]] && FM_TOC_LEVEL="$CLI_TOC_LEVEL"
338 [[ -n "$CLI_NUMBER_FROM" ]] && FM_NUMBERFROM="$CLI_NUMBER_FROM"
339 [[ -n "$CLI_NUMBERS" ]] && FM_NUMBERSECTIONS="$CLI_NUMBERS"
340
341 # --- Auto-detect document structure ---
342 # Count H1 headings (outside code blocks)
343 local H1_COUNT=0 IN_CODE_SCAN=0 FIRST_H1_TEXT=""
344 while IFS= read -r scanline || [[ -n "$scanline" ]]; do
345 [[ "$scanline" =~ ^\`\`\` ]] && { if [[ $IN_CODE_SCAN -eq 0 ]]; then IN_CODE_SCAN=1; else IN_CODE_SCAN=0; fi; continue; }
346 if [[ $IN_CODE_SCAN -eq 0 && "$scanline" =~ ^#\ ]]; then
347 H1_COUNT=$((H1_COUNT + 1))
348 [[ $H1_COUNT -eq 1 ]] && FIRST_H1_TEXT="${scanline#\# }"
349 fi
350 done < "$INPUT"
351
352 local FILE_TOC_LEVEL="${FM_TOC_LEVEL:-3}"
353 local FILE_NUMBERS=1
354 [[ "$FM_NUMBERSECTIONS" == "false" ]] && FILE_NUMBERS=0
355
356 # Auto-determine numberfrom based on structure (if not explicitly set)
357 local FILE_NUMBER_FROM="${FM_NUMBERFROM:-}"
358 local HIDE_FIRST_H1=0
359 if [[ -z "$FILE_NUMBER_FROM" ]]; then
360 if [[ $H1_COUNT -eq 1 ]]; then
361 # Single H1 = document title; number from H2, hide H1 in body
362 FILE_NUMBER_FROM=2
363 HIDE_FIRST_H1=1
364 # Use H1 text as title if no title set
365 [[ -z "$FM_TITLE" ]] && FM_TITLE="$FIRST_H1_TEXT"
366 detail "Auto: ${DIM}single H1 detected → using as title, numbering from H2${RESET}"
367 else
368 # Multiple H1s = sections; number from H1
369 FILE_NUMBER_FROM=1
370 detail "Auto: ${DIM}${H1_COUNT} H1s detected → numbering from H1${RESET}"
371 fi
372 fi
373 # Default date: current date/time
374 # Set to "none" in frontmatter or --date to suppress
375 FM_DATE_HASH="${FM_DATE_HASH:-}"
376 FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
377 if [[ "$FM_DATE" == "none" || "$FM_DATE" == "false" ]]; then
378 FM_DATE=""
379 elif [[ -z "$FM_DATE" && -z "$CLI_DATE" ]]; then
380 FM_DATE="$(date +"%Y-%m-%d %H:%M")"
381 fi
382
383 echo ""
384 [[ -n "$FM_TITLE" ]] && detail "Title: ${CYAN}${FM_TITLE}${RESET}"
385 [[ -n "$FM_SUBTITLE" ]] && detail "Subtitle: ${CYAN}${FM_SUBTITLE}${RESET}"
386 [[ -n "$FM_AUTHOR" ]] && detail "Author: ${CYAN}${FM_AUTHOR}${RESET}"
387 [[ -n "$FM_HEADER" ]] && detail "Header: ${CYAN}${FM_HEADER}${RESET}"
388 [[ -n "$FM_FOOTER" ]] && detail "Footer: ${CYAN}${FM_FOOTER}${RESET}"
389 detail "Date: ${CYAN}${FM_DATE}${RESET}"
390 detail "TOC: ${CYAN}level ${FILE_TOC_LEVEL}${RESET}"
391 detail "Numbered: ${CYAN}$([ $FILE_NUMBERS -eq 1 ] && echo "yes (from H${FILE_NUMBER_FROM})" || echo no)${RESET}"
392 [[ -n "$FM_WATERMARK" ]] && detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
393
394 # --- Git hash for source file ---
395 local GIT_STAMP=""
396
397# --- Discover images referenced in the markdown ---
398echo ""
399info "Scanning ${CYAN}${INPUT_FILE}${RESET} for assets..."
400
401IMAGES=()
402while IFS= read -r img; do
403 [[ -z "$img" ]] && continue
404 [[ "$img" =~ ^https?:// ]] && continue
405 if [[ -f "$INPUT_DIR/$img" ]]; then
406 IMAGES+=("$img")
407 success "Image: ${CYAN}${img}${RESET} ${DIM}($(du -h "$INPUT_DIR/$img" | cut -f1 | tr -d ' '))${RESET}"
408 else
409 warn "Image: ${YELLOW}${img}${RESET} ${RED}(not found)${RESET}"
410 fi
411done < <(sed -n 's/.*!\[[^]]*\](\([^)]*\)).*/\1/p' "$INPUT"; sed -n 's/.*src="\([^"]*\)".*/\1/p' "$INPUT")
412
413MERMAID_COUNT=$(grep -c '```mermaid' "$INPUT" || true)
414if [[ $MERMAID_COUNT -gt 0 ]]; then
415 success "Mermaid diagrams: ${CYAN}${MERMAID_COUNT}${RESET}"
416fi
417
418CALLOUT_COUNT=$(grep -c '> \[!' "$INPUT" || true)
419if [[ $CALLOUT_COUNT -gt 0 ]]; then
420 success "Callouts: ${CYAN}${CALLOUT_COUNT}${RESET}"
421fi
422
423TABLE_COUNT=$(grep -c '^|' "$INPUT" || true)
424CODE_COUNT=$(grep -c '```' "$INPUT" || true)
425CODE_COUNT=$(( (CODE_COUNT - MERMAID_COUNT * 2) / 2 ))
426[[ $TABLE_COUNT -gt 0 ]] && detail "Tables: ${TABLE_COUNT} rows"
427[[ $CODE_COUNT -gt 0 ]] && detail "Code blocks: ~${CODE_COUNT}"
428
429echo ""
430info "Found ${GREEN}${#IMAGES[@]}${RESET} image(s), ${GREEN}${MERMAID_COUNT}${RESET} mermaid diagram(s), ${GREEN}${CALLOUT_COUNT}${RESET} callout(s)"
431
432# --- Write the conversion script to a temp file (mounted into Docker) ---
433CONVERT_SCRIPT="${INPUT_DIR}/.pdfify-convert-$$.sh"
434trap 'rm -f "$CONVERT_SCRIPT"' EXIT
435cat > "$CONVERT_SCRIPT" <<'INNER_SCRIPT'
436#!/bin/bash
437set -euo pipefail
438
439RED='\033[0;31m'
440GREEN='\033[0;32m'
441YELLOW='\033[0;33m'
442BLUE='\033[0;34m'
443CYAN='\033[0;36m'
444BOLD='\033[1m'
445DIM='\033[2m'
446RESET='\033[0m'
447
448info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
449success() { echo -e "${GREEN}✓${RESET} $*"; }
450detail() { echo -e " ${DIM}→${RESET} $*"; }
451
452INPUT_FILE="$1"
453OUTPUT_FILE="$2"
454WORKDIR="/work"
455
456cd "$WORKDIR"
457
458# --- Step 0: Strip first H1 if it's being used as document title ---
459HIDE_FIRST_H1="${HIDE_FIRST_H1:-0}"
460EFFECTIVE_INPUT="$INPUT_FILE"
461if [[ "$HIDE_FIRST_H1" == "1" ]]; then
462 STRIPPED=$(mktemp /tmp/pdfify-stripped-XXXXXX.md)
463 FOUND_H1=0
464 IN_CODE_BLK=0
465 IN_FMATTER=0
466 while IFS= read -r line || [[ -n "$line" ]]; do
467 [[ "$line" =~ ^\`\`\` ]] && { if [[ $IN_CODE_BLK -eq 0 ]]; then IN_CODE_BLK=1; else IN_CODE_BLK=0; fi; }
468 if [[ "$line" == "---" && $IN_CODE_BLK -eq 0 ]]; then
469 if [[ $IN_FMATTER -eq 0 && $FOUND_H1 -eq 0 ]]; then IN_FMATTER=1; else IN_FMATTER=0; fi
470 fi
471 # Skip the first H1 (and any blank line immediately after)
472 if [[ $FOUND_H1 -eq 0 && $IN_CODE_BLK -eq 0 && $IN_FMATTER -eq 0 && "$line" =~ ^#\ ]]; then
473 FOUND_H1=1
474 continue
475 fi
476 # Skip blank line right after removed H1
477 if [[ $FOUND_H1 -eq 1 && -z "$line" ]]; then
478 FOUND_H1=2
479 continue
480 fi
481 [[ $FOUND_H1 -eq 1 ]] && FOUND_H1=2
482 echo "$line" >> "$STRIPPED"
483 done < "$INPUT_FILE"
484 EFFECTIVE_INPUT="$(basename "$STRIPPED")"
485 detail "Stripped first H1 (promoted to title)"
486fi
487
488# --- Step 1: Pre-process Obsidian callouts ---
489info "Pre-processing callouts..."
490
491CALLOUT_MD=$(mktemp /tmp/pdfify-callout-XXXXXX.md)
492IN_CALLOUT=0
493CALLOUT_TYPE=""
494CALLOUT_TITLE=""
495CALLOUT_BUF=""
496CALLOUT_COUNT=0
497
498flush_callout() {
499 if [[ $IN_CALLOUT -eq 1 && -n "$CALLOUT_TYPE" ]]; then
500 CALLOUT_COUNT=$((CALLOUT_COUNT + 1))
501 local latex_type
502 case "${CALLOUT_TYPE,,}" in
503 info|note) latex_type="calloutinfo" ;;
504 tip|hint) latex_type="callouttip" ;;
505 warning|caution) latex_type="calloutwarning" ;;
506 danger|error|bug) latex_type="calloutdanger" ;;
507 example) latex_type="calloutexample" ;;
508 quote|cite) latex_type="calloutquote" ;;
509 *) latex_type="calloutinfo" ;;
510 esac
511 echo "" >> "$CALLOUT_MD"
512 echo '```{=latex}' >> "$CALLOUT_MD"
513 echo "\\begin{${latex_type}}{${CALLOUT_TITLE}}" >> "$CALLOUT_MD"
514 echo '```' >> "$CALLOUT_MD"
515 echo "" >> "$CALLOUT_MD"
516 echo "$CALLOUT_BUF" >> "$CALLOUT_MD"
517 echo "" >> "$CALLOUT_MD"
518 echo '```{=latex}' >> "$CALLOUT_MD"
519 echo "\\end{${latex_type}}" >> "$CALLOUT_MD"
520 echo '```' >> "$CALLOUT_MD"
521 echo "" >> "$CALLOUT_MD"
522 fi
523 IN_CALLOUT=0
524 CALLOUT_TYPE=""
525 CALLOUT_TITLE=""
526 CALLOUT_BUF=""
527}
528
529while IFS= read -r line || [[ -n "$line" ]]; do
530 if [[ "$line" =~ ^\>\ *\[!([a-zA-Z]+)\]\ *(.*) ]]; then
531 flush_callout
532 IN_CALLOUT=1
533 CALLOUT_TYPE="${BASH_REMATCH[1]}"
534 CALLOUT_TITLE="${BASH_REMATCH[2]:-${BASH_REMATCH[1]^}}"
535 continue
536 fi
537
538 if [[ $IN_CALLOUT -eq 1 ]]; then
539 if [[ "$line" =~ ^\>\ ?(.*) ]]; then
540 CALLOUT_BUF="${CALLOUT_BUF}${BASH_REMATCH[1]}
541"
542 continue
543 else
544 flush_callout
545 fi
546 fi
547
548 echo "$line" >> "$CALLOUT_MD"
549done < "${STRIPPED:-$INPUT_FILE}"
550flush_callout
551
552if [[ $CALLOUT_COUNT -gt 0 ]]; then
553 success "Converted $CALLOUT_COUNT callout(s)"
554fi
555
556# --- Step 1b+1c: Inject page breaks (after TOC, before each H1) ---
557BREAK_INJECTED=$(mktemp /tmp/pdfify-breaks-XXXXXX.md)
558H1_COUNT=0
559IN_FM=0
560IN_CODE=0
561DONE_TOC_BREAK=0
562while IFS= read -r line || [[ -n "$line" ]]; do
563 # Track code blocks (``` opens/closes)
564 if [[ "$line" =~ ^\`\`\` ]]; then
565 if [[ $IN_CODE -eq 0 ]]; then IN_CODE=1; else IN_CODE=0; fi
566 echo "$line" >> "$BREAK_INJECTED"
567 continue
568 fi
569
570 # Track frontmatter (only at start of file)
571 if [[ "$line" == "---" && $IN_CODE -eq 0 ]]; then
572 if [[ $IN_FM -eq 0 && $H1_COUNT -eq 0 ]]; then IN_FM=1; else IN_FM=0; fi
573 echo "$line" >> "$BREAK_INJECTED"
574 continue
575 fi
576
577 if [[ $IN_CODE -eq 0 && $IN_FM -eq 0 ]]; then
578 # Before first content after frontmatter: inject TOC page break
579 if [[ $DONE_TOC_BREAK -eq 0 && "$TOC_LEVEL" -gt 0 && -n "$line" ]]; then
580 echo "" >> "$BREAK_INJECTED"
581 echo '```{=latex}' >> "$BREAK_INJECTED"
582 echo '\newpage' >> "$BREAK_INJECTED"
583 echo '```' >> "$BREAK_INJECTED"
584 echo "" >> "$BREAK_INJECTED"
585 DONE_TOC_BREAK=1
586 fi
587
588 # Page break before each top-level section (except first)
589 # Build the marker: numberfrom=1 → "# ", numberfrom=2 → "## "
590 BREAK_HASHES=$(printf '#%.0s' $(seq 1 "$FILE_NUMBER_FROM"))
591 if [[ "$line" == "${BREAK_HASHES} "* ]]; then
592 # Make sure it's exactly that level, not deeper
593 NEXT_CHAR="${line:${#BREAK_HASHES}:1}"
594 if [[ "$NEXT_CHAR" != "#" ]]; then
595 H1_COUNT=$((H1_COUNT + 1))
596 if [[ $H1_COUNT -gt 1 ]]; then
597 echo "" >> "$BREAK_INJECTED"
598 echo '```{=latex}' >> "$BREAK_INJECTED"
599 echo '\newpage' >> "$BREAK_INJECTED"
600 echo '```' >> "$BREAK_INJECTED"
601 echo "" >> "$BREAK_INJECTED"
602 fi
603 fi
604 fi
605 fi
606
607 echo "$line" >> "$BREAK_INJECTED"
608done < "$CALLOUT_MD"
609rm -f "$CALLOUT_MD"
610CALLOUT_MD="$BREAK_INJECTED"
611
612# --- Step 2: Pre-render Mermaid blocks to PNG ---
613info "Pre-rendering Mermaid diagrams..."
614
615TEMP_MD=$(mktemp /tmp/pdfify-XXXXXX.md)
616MERMAID_COUNT=0
617IN_MERMAID=0
618MERMAID_BUF=""
619
620while IFS= read -r line || [[ -n "$line" ]]; do
621 if [[ "$line" =~ ^\`\`\`mermaid ]]; then
622 IN_MERMAID=1
623 MERMAID_BUF=""
624 continue
625 fi
626
627 if [[ $IN_MERMAID -eq 1 ]]; then
628 if [[ "$line" =~ ^\`\`\` ]]; then
629 IN_MERMAID=0
630 MERMAID_COUNT=$((MERMAID_COUNT + 1))
631 MERMAID_FILE="/tmp/mermaid-${MERMAID_COUNT}.mmd"
632 MERMAID_PNG="/tmp/mermaid-${MERMAID_COUNT}.png"
633
634 echo "$MERMAID_BUF" > "$MERMAID_FILE"
635
636 detail "Rendering diagram ${CYAN}#${MERMAID_COUNT}${RESET}..."
637 mmdc -i "$MERMAID_FILE" \
638 -o "$MERMAID_PNG" \
639 -w 1600 \
640 -b transparent \
641 -c /opt/mermaid-config.json \
642 -p /opt/puppeteer-config.json \
643 2>/dev/null || {
644 echo -e " ${YELLOW}⚠${RESET} Diagram $MERMAID_COUNT failed — inserting as code block"
645 echo '```' >> "$TEMP_MD"
646 echo "$MERMAID_BUF" >> "$TEMP_MD"
647 echo '```' >> "$TEMP_MD"
648 continue
649 }
650
651 SIZE=$(du -h "$MERMAID_PNG" 2>/dev/null | cut -f1 | tr -d ' ')
652 success "Diagram #${MERMAID_COUNT} rendered ${DIM}(${SIZE})${RESET}"
653
654 echo "" >> "$TEMP_MD"
655 echo "![Diagram ${MERMAID_COUNT}](${MERMAID_PNG})\\" >> "$TEMP_MD"
656 echo "" >> "$TEMP_MD"
657 else
658 MERMAID_BUF="${MERMAID_BUF}${line}
659"
660 fi
661 else
662 echo "$line" >> "$TEMP_MD"
663 fi
664done < "$CALLOUT_MD"
665
666# --- Lua filter: protect brackets in headings for titlesec ---
667# Square brackets in headings break titlesec (\SQSPL@scan error) because LaTeX
668# interprets [ as the start of an optional argument.
669BRACKET_FILTER=$(mktemp /tmp/pdfify-bracket-filter-XXXXXX.lua)
670cat > "$BRACKET_FILTER" <<'LUAFILTER'
671-- Protect square brackets in headings to prevent titlesec \SQSPL@scan errors.
672-- Brackets in headings make titlesec think they are optional arguments.
673-- We replace [ and ] with \lbrack/\rbrack in all inline types.
674
675function Header(el)
676 if FORMAT ~= "latex" and FORMAT ~= "pdf" then return nil end
677
678 el = el:walk {
679 Str = function(s)
680 if s.text:find("[%[%]]") then
681 local t = s.text:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
682 return pandoc.RawInline("latex", t)
683 end
684 end,
685 Code = function(c)
686 -- All code in headings must use \oldtexttt to bypass seqsplit
687 -- (seqsplit in titlesec moving arguments causes \SQSPL@scan errors)
688 local t = c.text
689 t = t:gsub("\\", "\\textbackslash ")
690 t = t:gsub("%%", "\\%%")
691 t = t:gsub("%#", "\\#")
692 t = t:gsub("%$", "\\$")
693 t = t:gsub("%&", "\\&")
694 t = t:gsub("_", "\\_")
695 t = t:gsub("%{", "\\{")
696 t = t:gsub("%}", "\\}")
697 t = t:gsub("~", "\\textasciitilde{}")
698 t = t:gsub("%^", "\\textasciicircum{}")
699 t = t:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
700 return pandoc.RawInline("latex", "\\oldtexttt{" .. t .. "}")
701 end
702 }
703 return el
704end
705LUAFILTER
706
707echo ""
708info "Generating PDF with Pandoc + XeLaTeX..."
709detail "Engine: xelatex"
710detail "Font: Roboto / Roboto Mono"
711detail "Margins: 0.5in, Font size: 10pt"
712echo ""
713
714# Write LaTeX preamble for modern styling
715PREAMBLE=$(mktemp /tmp/pdfify-preamble-XXXXXX.tex)
716cat > "$PREAMBLE" <<'LATEX'
717% --- Modern color scheme ---
718\usepackage{xcolor}
719\definecolor{accent}{HTML}{374151}
720\definecolor{accentdark}{HTML}{111827}
721\definecolor{codebg}{HTML}{F8F9FA}
722\definecolor{codeborder}{HTML}{E2E8F0}
723\definecolor{headrulecolor}{HTML}{E2E8F0}
724
725% --- Callout colors ---
726\definecolor{infobg}{HTML}{EFF6FF}
727\definecolor{infobar}{HTML}{3B82F6}
728\definecolor{infofg}{HTML}{1E40AF}
729\definecolor{tipbg}{HTML}{F0FDF4}
730\definecolor{tipbar}{HTML}{22C55E}
731\definecolor{tipfg}{HTML}{166534}
732\definecolor{warningbg}{HTML}{FFFBEB}
733\definecolor{warningbar}{HTML}{F59E0B}
734\definecolor{warningfg}{HTML}{92400E}
735\definecolor{dangerbg}{HTML}{FEF2F2}
736\definecolor{dangerbar}{HTML}{EF4444}
737\definecolor{dangerfg}{HTML}{991B1B}
738\definecolor{examplebg}{HTML}{F5F3FF}
739\definecolor{examplebar}{HTML}{8B5CF6}
740\definecolor{examplefg}{HTML}{5B21B6}
741\definecolor{quotecallbg}{HTML}{F8F9FA}
742\definecolor{quotecallbar}{HTML}{6B7280}
743\definecolor{quotecallfg}{HTML}{374151}
744
745% --- Code block wrapping and styling ---
746\usepackage{fvextra}
747\DefineVerbatimEnvironment{Highlighting}{Verbatim}{
748 breaklines,
749 breakanywhere,
750 commandchars=\\\{\},
751 fontsize=\small
752}
753
754% Background on code blocks via mdframed
755\usepackage[framemethod=tikz]{mdframed}
756
757% Override pandoc's Shaded environment
758\renewenvironment{Shaded}{%
759 \begin{mdframed}[
760 backgroundcolor=codebg,
761 hidealllines=true,
762 roundcorner=4pt,
763 innertopmargin=8pt,
764 innerbottommargin=8pt,
765 innerleftmargin=10pt,
766 innerrightmargin=10pt,
767 skipabove=10pt,
768 skipbelow=10pt
769 ]
770}{%
771 \end{mdframed}
772}
773
774% --- Callout environments ---
775\newenvironment{calloutbase}[3]{%
776 \begin{mdframed}[
777 backgroundcolor=#1,
778 linecolor=#2,
779 linewidth=3pt,
780 topline=false,
781 bottomline=false,
782 rightline=false,
783 innertopmargin=12pt,
784 innerbottommargin=12pt,
785 innerleftmargin=12pt,
786 innerrightmargin=12pt,
787 skipabove=12pt,
788 skipbelow=12pt,
789 roundcorner=0pt
790 ]
791 \textbf{\color{#2}#3}\par\smallskip\setlength{\parindent}{0pt}
792}{%
793 \end{mdframed}
794}
795
796\newenvironment{calloutinfo}[1]{\begin{calloutbase}{infobg}{infobar}{#1}}{\end{calloutbase}}
797\newenvironment{callouttip}[1]{\begin{calloutbase}{tipbg}{tipbar}{#1}}{\end{calloutbase}}
798\newenvironment{calloutwarning}[1]{\begin{calloutbase}{warningbg}{warningbar}{#1}}{\end{calloutbase}}
799\newenvironment{calloutdanger}[1]{\begin{calloutbase}{dangerbg}{dangerbar}{#1}}{\end{calloutbase}}
800\newenvironment{calloutexample}[1]{\begin{calloutbase}{examplebg}{examplebar}{#1}}{\end{calloutbase}}
801\newenvironment{calloutquote}[1]{\begin{calloutbase}{quotecallbg}{quotecallbar}{#1}}{\end{calloutbase}}
802
803% --- PDF bookmarks (sidebar navigation in PDF viewers) ---
804\usepackage{bookmark}
805\bookmarksetup{
806 numbered=false,
807 open,
808 openlevel=2
809}
810
811% --- Title banner ---
812\definecolor{titlebg}{HTML}{E5E7EB}
813
814% --- Page break after TOC ---
815\let\oldtableofcontents\tableofcontents
816\renewcommand{\tableofcontents}{\oldtableofcontents\clearpage}
817
818% --- TOC styling ---
819\usepackage{tocloft}
820\setlength{\cftbeforetoctitleskip}{0.5em}
821\renewcommand{\cfttoctitlefont}{\LARGE\bfseries\color{accentdark}\scshape}
822\renewcommand{\cftaftertoctitle}{\par\vspace{2pt}{\color{headrulecolor}\hrule height 1pt}\vspace{10pt}}
823\renewcommand{\cftsecfont}{\bfseries\color{accentdark}}
824\renewcommand{\cftsecpagefont}{\bfseries\color{accentdark}}
825\renewcommand{\cftsubsecfont}{\color{accent}}
826\renewcommand{\cftsubsecpagefont}{\color{accent}}
827\renewcommand{\cftsubsubsecfont}{\small\color{accent}}
828\renewcommand{\cftsubsubsecpagefont}{\small\color{accent}}
829\renewcommand{\cftsecleader}{\cftdotfill{\cftsecdotsep}}
830\renewcommand{\cftsecdotsep}{\cftdotsep}
831\setlength{\cftbeforesecskip}{6pt}
832\setlength{\cftbeforesubsecskip}{2pt}
833
834% --- Heading font ---
835\newfontfamily\headingfont{Roboto}[BoldFont={Roboto Bold}]
836
837% --- Symbol fallback (arrows, etc.) ---
838\usepackage{newunicodechar}
839\newfontfamily\fallbackfont{Liberation Sans}[Scale=MatchLowercase]
840\newunicodechar{→}{{\fallbackfont →}}
841\newunicodechar{←}{{\fallbackfont ←}}
842\newunicodechar{↔}{{\fallbackfont ↔}}
843\newunicodechar{⇒}{{\fallbackfont ⇒}}
844\newunicodechar{⇐}{{\fallbackfont ⇐}}
845\newunicodechar{✓}{{\fallbackfont ✓}}
846\newunicodechar{✗}{{\fallbackfont ✗}}
847
848% --- Modern section headings (tight, bold, dark) ---
849\usepackage{titlesec}
850
851% H1: # headings — large, small caps, dark, with rule
852\titleformat{\section}
853 {\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
854 {\thesection}{0.5em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
855\titlespacing*{\section}{0pt}{20pt}{10pt}
856
857% H2: ## headings
858\titleformat{\subsection}
859 {\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
860 {\thesubsection}{0.5em}{}
861\titlespacing*{\subsection}{0pt}{16pt}{8pt}
862
863% H3: ### headings
864\titleformat{\subsubsection}
865 {\large\bfseries\color{accent}}
866 {\thesubsubsection}{0.5em}{}
867\titlespacing*{\subsubsection}{0pt}{12pt}{6pt}
868
869% H4: #### headings
870\titleformat{\paragraph}[hang]
871 {\normalsize\bfseries\color{accent}}
872 {\theparagraph}{0.5em}{}
873\titlespacing*{\paragraph}{0pt}{10pt}{4pt}
874
875%%SECNUMDEPTH_PLACEHOLDER%%
876
877% --- Page style (header/footer injected by pdfify) ---
878\usepackage{fancyhdr}
879\pagestyle{fancy}
880\fancyhf{}
881\renewcommand{\headrulewidth}{0pt}
882\renewcommand{\footrulewidth}{0pt}
883\setlength{\headheight}{14pt}
884%%HEADER_PLACEHOLDER%%
885%%FOOTER_PLACEHOLDER%%
886% Make plain style identical to fancy (so title/TOC pages get the same footer)
887\fancypagestyle{plain}{\fancyhf{}\renewcommand{\headrulewidth}{0pt}\renewcommand{\footrulewidth}{0pt}%%FOOTER_PLAIN%%}
888
889% --- Blockquote styling (plain > quotes, not callouts) ---
890\usepackage{etoolbox}
891\renewenvironment{quote}{%
892 \begin{mdframed}[
893 backgroundcolor=infobg,
894 linecolor=infobar,
895 linewidth=3pt,
896 topline=false,
897 bottomline=false,
898 rightline=false,
899 innertopmargin=12pt,
900 innerbottommargin=12pt,
901 innerleftmargin=12pt,
902 innerrightmargin=12pt,
903 skipabove=10pt,
904 skipbelow=10pt,
905 roundcorner=0pt
906 ]%
907}{%
908 \end{mdframed}%
909}
910
911% --- Table styling ---
912\usepackage{booktabs}
913\usepackage{colortbl}
914\usepackage{longtable}
915\usepackage{tabularx}
916\arrayrulecolor{codeborder}
917
918% Allow line breaks in table cells and shrink monospace to fit
919\usepackage{array}
920\renewcommand{\arraystretch}{1.4}
921\let\oldtexttt\texttt
922\renewcommand{\texttt}[1]{{\small\oldtexttt{\seqsplit{#1}}}}
923\usepackage{seqsplit}
924\setlength{\tabcolsep}{4pt}
925
926% --- Images constrained to page ---
927\usepackage{grffile}
928\usepackage[export]{adjustbox}
929\let\oldincludegraphics\includegraphics
930\renewcommand{\includegraphics}[2][]{%
931 \oldincludegraphics[max width=\textwidth,max height=0.45\textheight,keepaspectratio,#1]{#2}%
932}
933
934% --- Figures don't float ---
935\usepackage{float}
936\floatplacement{figure}{H}
937
938% --- Caption styling ---
939\usepackage{caption}
940\captionsetup{labelformat=empty,font={small,color=gray},skip=4pt}
941
942% --- Tighter lists ---
943\usepackage{enumitem}
944\setlist{nosep,leftmargin=1.5em}
945
946% --- Links ---
947\usepackage{hyperref}
948\hypersetup{
949 colorlinks=true,
950 linkcolor=accent,
951 urlcolor=accent,
952 citecolor=accent
953}
954
955% --- Horizontal rules ---
956\renewcommand{\rule}[2]{\textcolor{headrulecolor}{\vrule width \textwidth height 0.5pt}}
957LATEX
958
959TOC_LEVEL="${TOC_LEVEL:-3}"
960FM_FOOTER="${FM_FOOTER:-}"
961FM_HEADER="${FM_HEADER:-}"
962FM_AUTHOR="${FM_AUTHOR:-}"
963FM_DATE="${FM_DATE:-}"
964FM_DATE_LABEL="${FM_DATE_LABEL:-}"
965FM_DATE_HASH="${FM_DATE_HASH:-}"
966FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
967FILE_NUMBERS="${FILE_NUMBERS:-1}"
968FILE_NUMBER_FROM="${FILE_NUMBER_FROM:-2}"
969
970# Escape LaTeX special characters in text fields (uses sed to avoid
971# bash parameter substitution brace-parsing issues with } in replacements)
972latex_escape() {
973 printf '%s' "$1" | sed \
974 -e 's/\\/@@BSLASH@@/g' \
975 -e 's/&/\\&/g' \
976 -e 's/%/\\%/g' \
977 -e 's/\$/\\$/g' \
978 -e 's/#/\\#/g' \
979 -e 's/_/\\_/g' \
980 -e 's/{/\\{/g' \
981 -e 's/}/\\}/g' \
982 -e 's/~/\\textasciitilde{}/g' \
983 -e 's/\^/\\textasciicircum{}/g' \
984 -e 's/@@BSLASH@@/\\textbackslash{}/g'
985}
986
987# Inject title banner into preamble
988FM_TITLE="${FM_TITLE:-}"
989FM_TITLE_TEX="$(latex_escape "$FM_TITLE")"
990FM_SUBTITLE_TEX="$(latex_escape "${FM_SUBTITLE:-}")"
991FM_AUTHOR_TEX="$(latex_escape "${FM_AUTHOR:-}")"
992
993{
994if [[ -n "$FM_TITLE" ]]; then
995 cat <<'TITLE_STATIC'
996\makeatletter
997\renewcommand{\maketitle}{%
998 \thispagestyle{fancy}%
999 \vspace*{-\topskip}%
1000 \vspace*{-\headsep}%
1001 \vspace*{-\headheight}%
1002 \vspace*{-0.55in}%
1003 \noindent\hspace*{-0.5in}%
1004 \fcolorbox{titlebg}{titlebg}{%
1005 \parbox{\dimexpr\paperwidth-2\fboxsep-2\fboxrule}{%
1006 \hspace*{0.3in}\begin{minipage}{\dimexpr\textwidth}%
1007 \vspace{20pt}%
1008TITLE_STATIC
1009
1010 echo " {\\fontsize{28}{34}\\selectfont\\bfseries\\color{black}${FM_TITLE_TEX}}\\\\[6pt]%"
1011
1012 FM_SUBTITLE="${FM_SUBTITLE:-}"
1013 if [[ -n "$FM_SUBTITLE" ]]; then
1014 echo " {\\fontsize{14}{18}\\selectfont\\color{black}${FM_SUBTITLE_TEX}}\\\\[8pt]%"
1015 fi
1016
1017 if [[ -n "$FM_AUTHOR" ]]; then
1018 echo " {\\fontsize{11}{14}\\selectfont\\color{black}${FM_AUTHOR_TEX}}\\\\[6pt]%"
1019 fi
1020
1021 if [[ -n "$FM_DATE" ]]; then
1022 DATE_VAL=""
1023 if [[ -n "$FM_DATE_HASH" ]]; then
1024 DATE_VAL="${FM_DATE% · *} · {\\texttt{${FM_DATE_HASH}}}"
1025 else
1026 DATE_VAL="${FM_DATE}"
1027 fi
1028 DIRTY_PART=""
1029 if [[ -n "${FM_DATE_DIRTY:-}" ]]; then
1030 DIRTY_PART=" {\\color{gray}\\itshape (dirty)}"
1031 fi
1032 if [[ -n "$FM_DATE_LABEL" ]]; then
1033 echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL} {\\color{gray}--- ${FM_DATE_LABEL}}${DIRTY_PART}}\\\\[4pt]%"
1034 else
1035 echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL}${DIRTY_PART}}\\\\[4pt]%"
1036 fi
1037 fi
1038
1039 cat <<'TITLE_END'
1040 \vspace{6pt}%
1041 \end{minipage}%
1042 }%
1043 }%
1044 \par\vspace{20pt}%
1045}
1046\makeatother
1047TITLE_END
1048 echo '\AtBeginDocument{\maketitle}'
1049else
1050 echo '\renewcommand{\maketitle}{}'
1051fi
1052} >> "$PREAMBLE"
1053
1054# Inject header/footer into preamble
1055GIT_STAMP="${GIT_STAMP:-}"
1056
1057FOOTER_L=""
1058FOOTER_C=""
1059FOOTER_R="\\\\fancyfoot[R]{\\\\color{gray}\\\\small Page \\\\thepage\\\\ of \\\\pageref*{LastPage}}"
1060
1061[[ -n "$FM_FOOTER" ]] && FOOTER_L="\\\\fancyfoot[L]{\\\\color{gray}\\\\small ${FM_FOOTER}}"
1062
1063sed -i "s|%%FOOTER_PLACEHOLDER%%|\\\\usepackage{lastpage}${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
1064sed -i "s|%%FOOTER_PLAIN%%|${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
1065
1066if [[ -n "$FM_HEADER" ]]; then
1067 sed -i "s|%%HEADER_PLACEHOLDER%%|\\\\fancyhead[C]{\\\\color{gray}\\\\small ${FM_HEADER}}|" "$PREAMBLE"
1068else
1069 sed -i "s|%%HEADER_PLACEHOLDER%%||" "$PREAMBLE"
1070fi
1071
1072# Inject watermark if set
1073FM_WATERMARK="${FM_WATERMARK:-}"
1074if [[ -n "$FM_WATERMARK" ]]; then
1075 cat >> "$PREAMBLE" <<WATERMARK
1076\\usepackage{eso-pic}
1077\\usepackage{tikz}
1078\\AddToShipoutPictureFG{%
1079 \\begin{tikzpicture}[remember picture,overlay]
1080 \\node[rotate=45,opacity=0.12,scale=10,text=red] at (current page.center) {\\textsf{\\textbf{\\MakeUppercase{${FM_WATERMARK}}}}};
1081 \\end{tikzpicture}%
1082}
1083WATERMARK
1084 detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
1085fi
1086
1087# Build TOC flags
1088# When numbering is on, headings shift by -1, so TOC depth needs +1 to compensate
1089TOC_FLAGS=()
1090if [[ "$TOC_LEVEL" -gt 0 ]]; then
1091 TOC_FLAGS+=(--toc --toc-depth="$TOC_LEVEL")
1092 detail "TOC depth: ${CYAN}${TOC_LEVEL}${RESET}"
1093else
1094 detail "TOC: ${DIM}disabled${RESET}"
1095fi
1096
1097AUTHOR_FLAGS=()
1098if [[ -n "$FM_AUTHOR" ]]; then
1099 AUTHOR_FLAGS+=(-M "author=$FM_AUTHOR")
1100fi
1101
1102# Numbered sections
1103NUMBER_FLAGS=()
1104if [[ "$FILE_NUMBERS" == "1" ]]; then
1105 NUMBER_FLAGS+=(--number-sections)
1106
1107 # numberfrom controls which heading level starts getting numbers
1108 # pandoc: section=1, subsection=2, subsubsection=3
1109 cat >> "$PREAMBLE" <<SECNUM
1110\\setcounter{secnumdepth}{4}
1111SECNUM
1112
1113 if [[ "$FILE_NUMBER_FROM" -ge 2 ]]; then
1114 # H1 (\section) unnumbered, H2 numbered as 1, 2, 3
1115 cat >> "$PREAMBLE" <<'SECNUM2'
1116\makeatletter
1117\renewcommand{\thesection}{}
1118\renewcommand{\thesubsection}{\arabic{subsection}}
1119\renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}}
1120% Remove section number from titleformat without changing style
1121\titleformat{\section}
1122 {\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
1123 {}{0em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
1124\makeatother
1125SECNUM2
1126 fi
1127
1128 if [[ "$FILE_NUMBER_FROM" -ge 3 ]]; then
1129 cat >> "$PREAMBLE" <<'SECNUM3'
1130\renewcommand{\thesubsection}{}
1131\renewcommand{\thesubsubsection}{\arabic{subsubsection}}
1132\titleformat{\subsection}
1133 {\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
1134 {}{0em}{}
1135SECNUM3
1136 fi
1137fi
1138
1139# Remove placeholder
1140sed -i 's|%%SECNUMDEPTH_PLACEHOLDER%%||' "$PREAMBLE"
1141
1142pandoc "$TEMP_MD" \
1143 -o "$OUTPUT_FILE" \
1144 --pdf-engine=xelatex \
1145 --lua-filter="$BRACKET_FILTER" \
1146 --resource-path=".:$WORKDIR" \
1147 --columns=72 \
1148 -V geometry:"margin=0.5in,includehead,includefoot" \
1149 -V fontsize=10pt \
1150 -V mainfont="Roboto" \
1151 -V monofont="Roboto Mono" \
1152 "${TOC_FLAGS[@]}" \
1153 "${AUTHOR_FLAGS[@]}" \
1154 "${NUMBER_FLAGS[@]}" \
1155 --highlight-style=tango \
1156 -H "$PREAMBLE" \
1157 --standalone
1158
1159rm -f "$TEMP_MD" "$CALLOUT_MD" "$PREAMBLE" "${BRACKET_FILTER:-}" "${STRIPPED:-}" /tmp/mermaid-*.mmd /tmp/mermaid-*.png
1160
1161PAGES=$(strings "$OUTPUT_FILE" 2>/dev/null | grep -c '/Type /Page' || echo "?")
1162SIZE=$(du -h "$OUTPUT_FILE" | cut -f1 | tr -d ' ')
1163success "PDF generated: ${CYAN}${SIZE}${RESET}, ~${CYAN}${PAGES}${RESET} pages"
1164INNER_SCRIPT
1165
1166chmod +x "$CONVERT_SCRIPT"
1167
1168# --- Run Docker ---
1169echo ""
1170info "Launching Docker container..."
1171detail "Mounting: ${CYAN}${INPUT_DIR}${RESET} → /work ${DIM}(read-only)${RESET}"
1172detail "Output: ${CYAN}${OUTPUT_DIR}${RESET} → /output"
1173echo ""
1174
1175CONVERT_BASENAME="$(basename "$CONVERT_SCRIPT")"
1176docker run --rm \
1177 -v "$INPUT_DIR:/work:ro" \
1178 -v "$OUTPUT_DIR:/output" \
1179 -e "TOC_LEVEL=$FILE_TOC_LEVEL" \
1180 -e "FM_FOOTER=$FM_FOOTER" \
1181 -e "FM_HEADER=$FM_HEADER" \
1182 -e "FM_AUTHOR=$FM_AUTHOR" \
1183 -e "FM_TITLE=$FM_TITLE" \
1184 -e "FM_SUBTITLE=$FM_SUBTITLE" \
1185 -e "FM_DATE=$FM_DATE" \
1186 -e "FM_DATE_LABEL=${FM_DATE_LABEL:-}" \
1187 -e "FM_DATE_HASH=${FM_DATE_HASH:-}" \
1188 -e "FM_DATE_DIRTY=${FM_DATE_DIRTY:-}" \
1189 -e "GIT_STAMP=${GIT_STAMP:-}" \
1190 -e "FILE_NUMBERS=$FILE_NUMBERS" \
1191 -e "FILE_NUMBER_FROM=$FILE_NUMBER_FROM" \
1192 -e "HIDE_FIRST_H1=$HIDE_FIRST_H1" \
1193 -e "FM_WATERMARK=$FM_WATERMARK" \
1194 --tmpfs /tmp:exec \
1195 "$IMAGE_NAME" "/work/$CONVERT_BASENAME" "$INPUT_FILE" "/output/$OUTPUT_FILE" \
1196 || {
1197 echo ""
1198 echo -e " ${RED}${BOLD}Error producing PDF.${RESET} Docker/pandoc exited with a non-zero status."
1199 echo ""
1200 return 1
1201 }
1202
1203# Move preview file to /tmp and clean up
1204if [[ -n "$PREVIEW_FINAL" ]]; then
1205 mv "$OUTPUT" "$PREVIEW_FINAL"
1206 OUTPUT="$PREVIEW_FINAL"
1207fi
1208
1209echo ""
1210echo -e " ${GREEN}${BOLD}PDF created:${RESET} ${CYAN}${OUTPUT}${RESET}"
1211echo ""
1212
1213# Open if requested
1214if [[ $OPEN -eq 1 ]]; then
1215 open_pdf "$OUTPUT"
1216fi
1217}
1218
1219# --- Process each input file ---
1220run_all() {
1221 local FAILED=0
1222 for input_file in "${POSITIONAL[@]}"; do
1223 convert_file "$input_file" "$OUT_FILE" || FAILED=$((FAILED + 1))
1224 done
1225
1226 if [[ $FAILED -eq 0 ]]; then
1227 header "Complete! (${#POSITIONAL[@]} file(s))"
1228 else
1229 header "${FAILED} of ${#POSITIONAL[@]} file(s) failed"
1230 fi
1231}
1232
1233run_all
1234
1235# --- Watch mode ---
1236if [[ $WATCH -eq 1 ]]; then
1237 info "Watching for changes... ${DIM}(Ctrl+C to stop)${RESET}"
1238 echo ""
1239
1240 # Get initial checksums (using a temp file instead of associative array for bash 3 compat)
1241 CHECKSUM_FILE=$(mktemp)
1242 trap 'rm -f "$CHECKSUM_FILE"' EXIT
1243 for f in "${POSITIONAL[@]}"; do
1244 fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
1245 echo "$(_sha256 < "$fpath" | cut -d' ' -f1) $fpath" >> "$CHECKSUM_FILE"
1246 done
1247
1248 while true; do
1249 sleep 2
1250 CHANGED=0
1251 for f in "${POSITIONAL[@]}"; do
1252 fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
1253 NEW_HASH=$(_sha256 < "$fpath" | cut -d' ' -f1)
1254 OLD_HASH=$(grep " $fpath\$" "$CHECKSUM_FILE" | cut -d' ' -f1)
1255 if [[ "$NEW_HASH" != "$OLD_HASH" ]]; then
1256 CHANGED=1
1257 # Update stored checksum
1258 grep -v " $fpath\$" "$CHECKSUM_FILE" > "${CHECKSUM_FILE}.tmp" || true
1259 echo "$NEW_HASH $fpath" >> "${CHECKSUM_FILE}.tmp"
1260 mv "${CHECKSUM_FILE}.tmp" "$CHECKSUM_FILE"
1261 fi
1262 done
1263 if [[ $CHANGED -eq 1 ]]; then
1264 echo ""
1265 info "Change detected — rebuilding..."
1266 echo ""
1267 run_all
1268 fi
1269 done
1270fi
1271
1272# Check for updates (runs after success, fast timeout)
1273check_for_update
1274