Last active 1 month ago

Revision 24f66a03b4829cff6d91d161b15f6bbdda7721d2

pdfify.sh Raw
1#!/usr/bin/env bash
2set -euo pipefail
3
4# pdfify — Convert Markdown to beautiful PDF via Docker
5# Supports: images, mermaid diagrams, tables, code blocks, Obsidian callouts
6# Usage: ./pdfify <file.md> [file2.md ...] [options]
7
8VERSION="1.2.0"
9IMAGE_NAME="pdfify"
10GIST_ID="23f4514a1f0da1347d3f89926c23b68f"
11GIST_RAW="https://gist.githubusercontent.com/jclement/${GIST_ID}/raw/pdfify.sh"
12SELF="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
13
14# --- Colors ---
15RED='\033[0;31m'
16GREEN='\033[0;32m'
17YELLOW='\033[0;33m'
18BLUE='\033[0;34m'
19MAGENTA='\033[0;35m'
20CYAN='\033[0;36m'
21BOLD='\033[1m'
22DIM='\033[2m'
23RESET='\033[0m'
24
25# --- Portable SHA-256 (macOS has shasum, Linux often has sha256sum) ---
26_sha256() { shasum -a 256 "$@" 2>/dev/null || sha256sum "$@"; }
27
28info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
29success() { echo -e "${GREEN}${RESET} $*"; }
30warn() { echo -e "${YELLOW}${RESET} $*"; }
31detail() { echo -e " ${DIM}${RESET} $*"; }
32header() { echo -e "\n${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"; echo -e "${MAGENTA} ${BOLD}$*${RESET}"; echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"; }
33
34# --- Self-update ---
35do_update() {
36 info "Updating pdfify from gist..."
37 local raw_url tmp
38 raw_url=$(curl -fsSL "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//')
39 [[ -z "$raw_url" ]] && raw_url="$GIST_RAW"
40 tmp=$(mktemp)
41 if curl -fsSL "$raw_url" -o "$tmp" 2>/dev/null; then
42 if [[ -s "$tmp" ]] && head -1 "$tmp" | grep -q '^#!/'; then
43 chmod +x "$tmp"
44 mv "$tmp" "$SELF"
45 success "Updated to latest version"
46 detail "${CYAN}${SELF}${RESET}"
47 else
48 rm -f "$tmp"
49 echo -e "${RED}Error:${RESET} Downloaded file doesn't look like a script"
50 exit 1
51 fi
52 else
53 rm -f "$tmp"
54 echo -e "${RED}Error:${RESET} Failed to download update"
55 exit 1
56 fi
57 exit 0
58}
59
60check_for_update() {
61 local remote_hash local_hash raw_url
62 raw_url=$(curl -fsSL --connect-timeout 2 --max-time 3 "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//') || return 0
63 [[ -z "$raw_url" ]] && return 0
64 remote_hash=$(curl -fsSL --connect-timeout 2 --max-time 5 "$raw_url" 2>/dev/null | _sha256 | cut -d' ' -f1) || return 0
65 local_hash=$(_sha256 < "$SELF" | cut -d' ' -f1)
66 if [[ -n "$remote_hash" && "$remote_hash" != "$local_hash" ]]; then
67 echo -e "${YELLOW}${RESET} ${DIM}A newer version of pdfify is available. Run ${CYAN}pdfify --update${DIM} to upgrade.${RESET}"
68 fi
69}
70
71# --- Args (CLI overrides frontmatter; "" means "use frontmatter default") ---
72REBUILD=0
73WATCH=0
74OPEN=0
75PREVIEW=0
76OUT_FILE=""
77NEXT_KEY=""
78POSITIONAL=()
79
80# CLI overrides — empty string means "not set, defer to frontmatter"
81CLI_TOC_LEVEL=""
82CLI_NUMBERS=""
83CLI_NUMBER_FROM=""
84CLI_TITLE=""
85CLI_SUBTITLE=""
86CLI_AUTHOR=""
87CLI_HEADER=""
88CLI_FOOTER=""
89CLI_DATE=""
90CLI_WATERMARK=""
91
92for arg in "$@"; do
93 if [[ -n "$NEXT_KEY" ]]; then
94 case "$NEXT_KEY" in
95 toc-level) CLI_TOC_LEVEL="$arg" ;;
96 number-from) CLI_NUMBER_FROM="$arg" ;;
97 out) OUT_FILE="$arg" ;;
98 title) CLI_TITLE="$arg" ;;
99 subtitle) CLI_SUBTITLE="$arg" ;;
100 author) CLI_AUTHOR="$arg" ;;
101 header) CLI_HEADER="$arg" ;;
102 footer) CLI_FOOTER="$arg" ;;
103 date) CLI_DATE="$arg" ;;
104 watermark) CLI_WATERMARK="$arg" ;;
105 esac
106 NEXT_KEY=""
107 continue
108 fi
109 case "$arg" in
110 --rebuild) REBUILD=1 ;;
111 --update) do_update ;;
112 --watch) WATCH=1 ;;
113 --open) OPEN=1 ;;
114 --preview) PREVIEW=1; OPEN=1 ;;
115 --no-numbers) CLI_NUMBERS="false" ;;
116 --numbers) CLI_NUMBERS="true" ;;
117 --clean) echo -e "${BLUE}::${RESET} ${BOLD}Removing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
118 docker rmi "$IMAGE_NAME" >/dev/null 2>&1 && echo -e "${GREEN}${RESET} Image removed" || echo -e "${DIM}Image not found${RESET}"
119 exit 0 ;;
120 --toc-level) NEXT_KEY="toc-level" ;;
121 --toc-level=*) CLI_TOC_LEVEL="${arg#*=}" ;;
122 --number-from) NEXT_KEY="number-from" ;;
123 --number-from=*) CLI_NUMBER_FROM="${arg#*=}" ;;
124 --out) NEXT_KEY="out" ;;
125 --out=*) OUT_FILE="${arg#*=}" ;;
126 --title) NEXT_KEY="title" ;;
127 --title=*) CLI_TITLE="${arg#*=}" ;;
128 --subtitle) NEXT_KEY="subtitle" ;;
129 --subtitle=*) CLI_SUBTITLE="${arg#*=}" ;;
130 --author) NEXT_KEY="author" ;;
131 --author=*) CLI_AUTHOR="${arg#*=}" ;;
132 --header) NEXT_KEY="header" ;;
133 --header=*) CLI_HEADER="${arg#*=}" ;;
134 --footer) NEXT_KEY="footer" ;;
135 --footer=*) CLI_FOOTER="${arg#*=}" ;;
136 --date) NEXT_KEY="date" ;;
137 --date=*) CLI_DATE="${arg#*=}" ;;
138 --watermark) NEXT_KEY="watermark" ;;
139 --watermark=*) CLI_WATERMARK="${arg#*=}" ;;
140 --version) echo "pdfify v${VERSION}"; exit 0 ;;
141 --help|-h) echo -e "${BOLD}pdfify${RESET} v${VERSION} — Markdown to PDF"
142 echo ""
143 echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
144 echo ""
145 echo -e "${BOLD}Options:${RESET}"
146 echo -e " ${DIM}--out FILE${RESET} Output file (single input only)"
147 echo -e " ${DIM}--toc-level N${RESET} TOC depth: 0=none, 1=H1, 2=H2, 3=H3 (default: 3)"
148 echo -e " ${DIM}--numbers${RESET} Enable numbered headings (default)"
149 echo -e " ${DIM}--no-numbers${RESET} Disable numbered headings"
150 echo -e " ${DIM}--number-from N${RESET} Start numbering at heading level N (default: 2)"
151 echo -e " ${DIM}--open${RESET} Open PDF after generation"
152 echo -e " ${DIM}--preview${RESET} Render to /tmp and open (no permanent file)"
153 echo -e " ${DIM}--watch${RESET} Watch for changes and regenerate"
154 echo -e " ${DIM}--rebuild${RESET} Force rebuild the Docker image"
155 echo -e " ${DIM}--clean${RESET} Remove the Docker image"
156 echo -e " ${DIM}--update${RESET} Update pdfify to latest version from gist"
157 echo -e " ${DIM}--version${RESET} Show version"
158 echo ""
159 echo -e "${BOLD}Overrides${RESET} (CLI trumps frontmatter):"
160 echo -e " ${DIM}--title TEXT${RESET} ${DIM}--subtitle TEXT${RESET}"
161 echo -e " ${DIM}--author TEXT${RESET} ${DIM}--header TEXT${RESET}"
162 echo -e " ${DIM}--footer TEXT${RESET} ${DIM}--date TEXT${RESET}"
163 echo -e " ${DIM}--watermark TEXT${RESET}"
164 echo ""
165 echo -e "${BOLD}Frontmatter:${RESET}"
166 echo -e " title, subtitle, author, header, footer, toc-level, date,"
167 echo -e " numbersections (true/false), numberfrom (1-4), watermark,"
168 echo -e " pagebreak (true/false — page break before each top-level heading)"
169 exit 0 ;;
170 *) POSITIONAL+=("$arg") ;;
171 esac
172done
173
174if [[ ${#POSITIONAL[@]} -lt 1 ]]; then
175 echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
176 echo -e " Run ${CYAN}pdfify --help${RESET} for all options"
177 exit 1
178fi
179
180if [[ -n "$OUT_FILE" && ${#POSITIONAL[@]} -gt 1 ]]; then
181 echo -e "${RED}Error:${RESET} --out cannot be used with multiple input files"
182 exit 1
183fi
184
185# --- Open helper ---
186open_pdf() {
187 local pdf="$1"
188 if command -v open >/dev/null 2>&1; then
189 open "$pdf"
190 elif command -v xdg-open >/dev/null 2>&1; then
191 xdg-open "$pdf"
192 fi
193}
194
195header "pdfify v${VERSION}"
196
197# --- Embedded Dockerfile ---
198DOCKERFILE=$(cat <<'DOCKERFILE_END'
199FROM node:20-slim
200ENV DEBIAN_FRONTEND=noninteractive
201RUN apt-get update -qq && \
202 apt-get install -y --no-install-recommends \
203 pandoc \
204 texlive-latex-recommended \
205 texlive-latex-extra \
206 texlive-fonts-recommended \
207 texlive-fonts-extra \
208 texlive-xetex \
209 lmodern \
210 librsvg2-bin \
211 chromium \
212 ca-certificates \
213 fonts-liberation \
214 fonts-roboto \
215 fonts-roboto-unhinted \
216 fonts-noto-color-emoji \
217 wget \
218 fontconfig \
219 && rm -rf /var/lib/apt/lists/*
220RUN mkdir -p /usr/share/fonts/truetype/roboto-mono && \
221 for style in Regular Bold Italic BoldItalic Medium MediumItalic Light LightItalic; do \
222 wget -q "https://github.com/googlefonts/RobotoMono/raw/main/fonts/ttf/RobotoMono-${style}.ttf" \
223 -O "/usr/share/fonts/truetype/roboto-mono/RobotoMono-${style}.ttf" 2>/dev/null || true; \
224 done && \
225 fc-cache -f
226RUN npm install -g @mermaid-js/mermaid-cli
227ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
228ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
229ENV CHROME_PATH=/usr/bin/chromium
230RUN echo '{"maxTextSize": 90000, "flowchart": {"useMaxWidth": true}, "theme": "base", "themeVariables": {"primaryColor": "#3B82F6", "primaryBorderColor": "#1E40AF", "primaryTextColor": "#1E293B", "lineColor": "#475569", "xyChart": {"backgroundColor": "transparent", "plotColorPalette": "#2563EB,#DC2626,#16A34A,#D97706,#9333EA,#0891B2"}}}' > /opt/mermaid-config.json
231RUN echo '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]}' > /opt/puppeteer-config.json
232WORKDIR /work
233ENTRYPOINT ["/bin/bash"]
234DOCKERFILE_END
235)
236
237# --- Build Docker image ---
238echo ""
239if [[ $REBUILD -eq 1 ]]; then
240 info "Removing existing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
241 docker rmi "$IMAGE_NAME" >/dev/null 2>&1 || true
242 success "Image removed"
243fi
244
245if docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
246 info "Docker image ${GREEN}${IMAGE_NAME}${RESET} found ${DIM}(cached)${RESET}"
247 success "Reusing existing image"
248else
249 info "Building Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
250 detail "Installing: pandoc, XeLaTeX, mermaid-cli, Chromium, fonts"
251 detail "This takes 2-3 minutes on first run (cached after)"
252 echo ""
253 BUILD_CTX=$(mktemp -d)
254 echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX" 2>&1 | while IFS= read -r line; do
255 if [[ "$line" =~ ^Step\ ([0-9]+)/([0-9]+) ]]; then
256 echo -e " ${CYAN}[${BASH_REMATCH[1]}/${BASH_REMATCH[2]}]${RESET} ${DIM}${line#*: }${RESET}"
257 elif [[ "$line" == *"Successfully tagged"* ]]; then
258 echo -e " ${GREEN}${line}${RESET}"
259 elif [[ "$line" == *"ERROR"* || "$line" == *"error"* ]]; then
260 echo -e " ${RED}${line}${RESET}"
261 fi
262 done
263 rm -rf "$BUILD_CTX"
264 if ! docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
265 echo -e "\n${RED}Docker build failed. Re-running with full output:${RESET}\n"
266 BUILD_CTX=$(mktemp -d)
267 echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX"
268 rm -rf "$BUILD_CTX"
269 exit 1
270 fi
271 success "Docker image built"
272fi
273
274# === Per-file conversion ===
275convert_file() {
276 local INPUT_PATH="$1"
277 local OUTPUT_OVERRIDE="$2"
278
279 # --- Resolve paths ---
280 local INPUT INPUT_DIR INPUT_FILE OUTPUT OUTPUT_DIR OUTPUT_FILE
281 INPUT="$(cd "$(dirname "$INPUT_PATH")" && pwd)/$(basename "$INPUT_PATH")"
282 if [[ ! -f "$INPUT" ]]; then
283 echo -e "${RED}Error:${RESET} $INPUT_PATH not found"
284 return 1
285 fi
286 INPUT_DIR="$(dirname "$INPUT")"
287 INPUT_FILE="$(basename "$INPUT")"
288 OUTPUT="${OUTPUT_OVERRIDE:-${INPUT%.md}.pdf}"
289 OUTPUT_DIR="$(cd "$(dirname "$OUTPUT")" 2>/dev/null && pwd || (mkdir -p "$(dirname "$OUTPUT")" && cd "$(dirname "$OUTPUT")" && pwd))"
290 OUTPUT="${OUTPUT_DIR}/$(basename "$OUTPUT")"
291 OUTPUT_FILE="$(basename "$OUTPUT")"
292
293 # Preview mode: write temp file in input dir (Docker-mountable), move to /tmp after
294 local PREVIEW_FINAL=""
295 if [[ $PREVIEW -eq 1 ]]; then
296 local base="${INPUT_FILE%.md}"
297 PREVIEW_FINAL="/tmp/pdfify-preview-${base}.pdf"
298 OUTPUT_FILE=".pdfify-preview-${base}.pdf"
299 OUTPUT="${OUTPUT_DIR}/${OUTPUT_FILE}"
300 fi
301
302 info "Input: ${CYAN}${INPUT}${RESET}"
303 if [[ -n "$PREVIEW_FINAL" ]]; then
304 info "Output: ${CYAN}${PREVIEW_FINAL}${RESET} ${DIM}(preview)${RESET}"
305 else
306 info "Output: ${CYAN}${OUTPUT}${RESET}"
307 fi
308
309 # --- Parse YAML frontmatter ---
310 local FM_TITLE="" FM_SUBTITLE="" FM_AUTHOR="" FM_FOOTER="" FM_HEADER=""
311 local FM_TOC_LEVEL="" FM_DATE="" FM_NUMBERSECTIONS="" FM_NUMBERFROM="" FM_WATERMARK=""
312 local FM_DATE_HASH="" FM_DATE_DIRTY="" FM_DATE_LABEL=""
313
314 if head -1 "$INPUT" | grep -q '^---'; then
315 local FM_BLOCK
316 FM_BLOCK=$(awk 'NR==1 && /^---/{found=1; next} found && /^---/{exit} found{print}' "$INPUT")
317 extract_fm() { echo "$FM_BLOCK" | sed -n "s/^$1:[[:space:]]*//p" | sed 's/^["'"'"']\(.*\)["'"'"']$/\1/'; }
318 FM_TITLE=$(extract_fm "title")
319 FM_AUTHOR=$(extract_fm "author")
320 FM_SUBTITLE=$(extract_fm "subtitle")
321 FM_FOOTER=$(extract_fm "footer")
322 FM_HEADER=$(extract_fm "header")
323 FM_TOC_LEVEL=$(extract_fm "toc-level")
324 FM_DATE=$(extract_fm "date")
325 FM_NUMBERSECTIONS=$(extract_fm "numbersections")
326 FM_NUMBERFROM=$(extract_fm "numberfrom")
327 FM_WATERMARK=$(extract_fm "watermark")
328 FM_PAGEBREAK=$(extract_fm "pagebreak")
329 fi
330
331 # --- CLI overrides frontmatter ---
332 [[ -n "$CLI_TITLE" ]] && FM_TITLE="$CLI_TITLE"
333 [[ -n "$CLI_SUBTITLE" ]] && FM_SUBTITLE="$CLI_SUBTITLE"
334 [[ -n "$CLI_AUTHOR" ]] && FM_AUTHOR="$CLI_AUTHOR"
335 [[ -n "$CLI_FOOTER" ]] && FM_FOOTER="$CLI_FOOTER"
336 [[ -n "$CLI_HEADER" ]] && FM_HEADER="$CLI_HEADER"
337 [[ -n "$CLI_DATE" ]] && FM_DATE="$CLI_DATE"
338 [[ -n "$CLI_WATERMARK" ]] && FM_WATERMARK="$CLI_WATERMARK"
339 [[ -n "$CLI_TOC_LEVEL" ]] && FM_TOC_LEVEL="$CLI_TOC_LEVEL"
340 [[ -n "$CLI_NUMBER_FROM" ]] && FM_NUMBERFROM="$CLI_NUMBER_FROM"
341 [[ -n "$CLI_NUMBERS" ]] && FM_NUMBERSECTIONS="$CLI_NUMBERS"
342
343 # --- Auto-detect document structure ---
344 # Count H1 headings (outside code blocks)
345 local H1_COUNT=0 IN_CODE_SCAN=0 FIRST_H1_TEXT=""
346 while IFS= read -r scanline || [[ -n "$scanline" ]]; do
347 [[ "$scanline" =~ ^\`\`\` ]] && { if [[ $IN_CODE_SCAN -eq 0 ]]; then IN_CODE_SCAN=1; else IN_CODE_SCAN=0; fi; continue; }
348 if [[ $IN_CODE_SCAN -eq 0 && "$scanline" =~ ^#\ ]]; then
349 H1_COUNT=$((H1_COUNT + 1))
350 [[ $H1_COUNT -eq 1 ]] && FIRST_H1_TEXT="${scanline#\# }"
351 fi
352 done < "$INPUT"
353
354 local FILE_TOC_LEVEL="${FM_TOC_LEVEL:-3}"
355 local FILE_NUMBERS=1
356 [[ "$FM_NUMBERSECTIONS" == "false" ]] && FILE_NUMBERS=0
357 local FILE_PAGEBREAK=1
358 [[ "$FM_PAGEBREAK" == "false" ]] && FILE_PAGEBREAK=0
359
360 # Auto-determine numberfrom based on structure (if not explicitly set)
361 local FILE_NUMBER_FROM="${FM_NUMBERFROM:-}"
362 local HIDE_FIRST_H1=0
363 if [[ -z "$FILE_NUMBER_FROM" ]]; then
364 if [[ $H1_COUNT -eq 1 ]]; then
365 # Single H1 = document title; number from H2, hide H1 in body
366 FILE_NUMBER_FROM=2
367 HIDE_FIRST_H1=1
368 # Use H1 text as title if no title set
369 [[ -z "$FM_TITLE" ]] && FM_TITLE="$FIRST_H1_TEXT"
370 detail "Auto: ${DIM}single H1 detected → using as title, numbering from H2${RESET}"
371 else
372 # Multiple H1s = sections; number from H1
373 FILE_NUMBER_FROM=1
374 detail "Auto: ${DIM}${H1_COUNT} H1s detected → numbering from H1${RESET}"
375 fi
376 fi
377 # Default date: current date/time
378 # Set to "none" in frontmatter or --date to suppress
379 FM_DATE_HASH="${FM_DATE_HASH:-}"
380 FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
381 if [[ "$FM_DATE" == "none" || "$FM_DATE" == "false" ]]; then
382 FM_DATE=""
383 elif [[ -z "$FM_DATE" && -z "$CLI_DATE" ]]; then
384 FM_DATE="$(date +"%Y-%m-%d %H:%M")"
385 fi
386
387 echo ""
388 [[ -n "$FM_TITLE" ]] && detail "Title: ${CYAN}${FM_TITLE}${RESET}"
389 [[ -n "$FM_SUBTITLE" ]] && detail "Subtitle: ${CYAN}${FM_SUBTITLE}${RESET}"
390 [[ -n "$FM_AUTHOR" ]] && detail "Author: ${CYAN}${FM_AUTHOR}${RESET}"
391 [[ -n "$FM_HEADER" ]] && detail "Header: ${CYAN}${FM_HEADER}${RESET}"
392 [[ -n "$FM_FOOTER" ]] && detail "Footer: ${CYAN}${FM_FOOTER}${RESET}"
393 detail "Date: ${CYAN}${FM_DATE}${RESET}"
394 detail "TOC: ${CYAN}level ${FILE_TOC_LEVEL}${RESET}"
395 detail "Numbered: ${CYAN}$([ $FILE_NUMBERS -eq 1 ] && echo "yes (from H${FILE_NUMBER_FROM})" || echo no)${RESET}"
396 [[ -n "$FM_WATERMARK" ]] && detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
397
398 # --- Git hash for source file ---
399 local GIT_STAMP=""
400
401# --- Discover images referenced in the markdown ---
402echo ""
403info "Scanning ${CYAN}${INPUT_FILE}${RESET} for assets..."
404
405IMAGES=()
406while IFS= read -r img; do
407 [[ -z "$img" ]] && continue
408 [[ "$img" =~ ^https?:// ]] && continue
409 if [[ -f "$INPUT_DIR/$img" ]]; then
410 IMAGES+=("$img")
411 success "Image: ${CYAN}${img}${RESET} ${DIM}($(du -h "$INPUT_DIR/$img" | cut -f1 | tr -d ' '))${RESET}"
412 else
413 warn "Image: ${YELLOW}${img}${RESET} ${RED}(not found)${RESET}"
414 fi
415done < <(sed -n 's/.*!\[[^]]*\](\([^)]*\)).*/\1/p' "$INPUT"; sed -n 's/.*src="\([^"]*\)".*/\1/p' "$INPUT")
416
417MERMAID_COUNT=$(grep -c '```mermaid' "$INPUT" || true)
418if [[ $MERMAID_COUNT -gt 0 ]]; then
419 success "Mermaid diagrams: ${CYAN}${MERMAID_COUNT}${RESET}"
420fi
421
422CALLOUT_COUNT=$(grep -c '> \[!' "$INPUT" || true)
423if [[ $CALLOUT_COUNT -gt 0 ]]; then
424 success "Callouts: ${CYAN}${CALLOUT_COUNT}${RESET}"
425fi
426
427TABLE_COUNT=$(grep -c '^|' "$INPUT" || true)
428CODE_COUNT=$(grep -c '```' "$INPUT" || true)
429CODE_COUNT=$(( (CODE_COUNT - MERMAID_COUNT * 2) / 2 ))
430[[ $TABLE_COUNT -gt 0 ]] && detail "Tables: ${TABLE_COUNT} rows"
431[[ $CODE_COUNT -gt 0 ]] && detail "Code blocks: ~${CODE_COUNT}"
432
433echo ""
434info "Found ${GREEN}${#IMAGES[@]}${RESET} image(s), ${GREEN}${MERMAID_COUNT}${RESET} mermaid diagram(s), ${GREEN}${CALLOUT_COUNT}${RESET} callout(s)"
435
436# --- Write the conversion script to a temp file (mounted into Docker) ---
437CONVERT_SCRIPT="${INPUT_DIR}/.pdfify-convert-$$.sh"
438trap 'rm -f "$CONVERT_SCRIPT"' EXIT
439cat > "$CONVERT_SCRIPT" <<'INNER_SCRIPT'
440#!/bin/bash
441set -euo pipefail
442
443RED='\033[0;31m'
444GREEN='\033[0;32m'
445YELLOW='\033[0;33m'
446BLUE='\033[0;34m'
447CYAN='\033[0;36m'
448BOLD='\033[1m'
449DIM='\033[2m'
450RESET='\033[0m'
451
452info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
453success() { echo -e "${GREEN}✓${RESET} $*"; }
454detail() { echo -e " ${DIM}→${RESET} $*"; }
455
456INPUT_FILE="$1"
457OUTPUT_FILE="$2"
458WORKDIR="/work"
459
460cd "$WORKDIR"
461
462# --- Step 0: Strip first H1 if it's being used as document title ---
463HIDE_FIRST_H1="${HIDE_FIRST_H1:-0}"
464EFFECTIVE_INPUT="$INPUT_FILE"
465if [[ "$HIDE_FIRST_H1" == "1" ]]; then
466 STRIPPED=$(mktemp /tmp/pdfify-stripped-XXXXXX.md)
467 FOUND_H1=0
468 IN_CODE_BLK=0
469 IN_FMATTER=0
470 while IFS= read -r line || [[ -n "$line" ]]; do
471 [[ "$line" =~ ^\`\`\` ]] && { if [[ $IN_CODE_BLK -eq 0 ]]; then IN_CODE_BLK=1; else IN_CODE_BLK=0; fi; }
472 if [[ "$line" == "---" && $IN_CODE_BLK -eq 0 ]]; then
473 if [[ $IN_FMATTER -eq 0 && $FOUND_H1 -eq 0 ]]; then IN_FMATTER=1; else IN_FMATTER=0; fi
474 fi
475 # Skip the first H1 (and any blank line immediately after)
476 if [[ $FOUND_H1 -eq 0 && $IN_CODE_BLK -eq 0 && $IN_FMATTER -eq 0 && "$line" =~ ^#\ ]]; then
477 FOUND_H1=1
478 continue
479 fi
480 # Skip blank line right after removed H1
481 if [[ $FOUND_H1 -eq 1 && -z "$line" ]]; then
482 FOUND_H1=2
483 continue
484 fi
485 [[ $FOUND_H1 -eq 1 ]] && FOUND_H1=2
486 echo "$line" >> "$STRIPPED"
487 done < "$INPUT_FILE"
488 EFFECTIVE_INPUT="$(basename "$STRIPPED")"
489 detail "Stripped first H1 (promoted to title)"
490fi
491
492# --- Step 1: Pre-process Obsidian callouts ---
493info "Pre-processing callouts..."
494
495CALLOUT_MD=$(mktemp /tmp/pdfify-callout-XXXXXX.md)
496IN_CALLOUT=0
497CALLOUT_TYPE=""
498CALLOUT_TITLE=""
499CALLOUT_BUF=""
500CALLOUT_COUNT=0
501
502flush_callout() {
503 if [[ $IN_CALLOUT -eq 1 && -n "$CALLOUT_TYPE" ]]; then
504 CALLOUT_COUNT=$((CALLOUT_COUNT + 1))
505 local latex_type
506 case "${CALLOUT_TYPE,,}" in
507 info|note) latex_type="calloutinfo" ;;
508 tip|hint) latex_type="callouttip" ;;
509 warning|caution) latex_type="calloutwarning" ;;
510 danger|error|bug) latex_type="calloutdanger" ;;
511 example) latex_type="calloutexample" ;;
512 quote|cite) latex_type="calloutquote" ;;
513 *) latex_type="calloutinfo" ;;
514 esac
515 echo "" >> "$CALLOUT_MD"
516 echo '```{=latex}' >> "$CALLOUT_MD"
517 echo "\\begin{${latex_type}}{${CALLOUT_TITLE}}" >> "$CALLOUT_MD"
518 echo '```' >> "$CALLOUT_MD"
519 echo "" >> "$CALLOUT_MD"
520 echo "$CALLOUT_BUF" >> "$CALLOUT_MD"
521 echo "" >> "$CALLOUT_MD"
522 echo '```{=latex}' >> "$CALLOUT_MD"
523 echo "\\end{${latex_type}}" >> "$CALLOUT_MD"
524 echo '```' >> "$CALLOUT_MD"
525 echo "" >> "$CALLOUT_MD"
526 fi
527 IN_CALLOUT=0
528 CALLOUT_TYPE=""
529 CALLOUT_TITLE=""
530 CALLOUT_BUF=""
531}
532
533while IFS= read -r line || [[ -n "$line" ]]; do
534 if [[ "$line" =~ ^\>\ *\[!([a-zA-Z]+)\]\ *(.*) ]]; then
535 flush_callout
536 IN_CALLOUT=1
537 CALLOUT_TYPE="${BASH_REMATCH[1]}"
538 CALLOUT_TITLE="${BASH_REMATCH[2]:-${BASH_REMATCH[1]^}}"
539 continue
540 fi
541
542 if [[ $IN_CALLOUT -eq 1 ]]; then
543 if [[ "$line" =~ ^\>\ ?(.*) ]]; then
544 CALLOUT_BUF="${CALLOUT_BUF}${BASH_REMATCH[1]}
545"
546 continue
547 else
548 flush_callout
549 fi
550 fi
551
552 echo "$line" >> "$CALLOUT_MD"
553done < "${STRIPPED:-$INPUT_FILE}"
554flush_callout
555
556if [[ $CALLOUT_COUNT -gt 0 ]]; then
557 success "Converted $CALLOUT_COUNT callout(s)"
558fi
559
560# --- Step 1b+1c: Inject page breaks (after TOC, before each H1) ---
561BREAK_INJECTED=$(mktemp /tmp/pdfify-breaks-XXXXXX.md)
562H1_COUNT=0
563IN_FM=0
564IN_CODE=0
565DONE_TOC_BREAK=0
566while IFS= read -r line || [[ -n "$line" ]]; do
567 # Track code blocks (``` opens/closes)
568 if [[ "$line" =~ ^\`\`\` ]]; then
569 if [[ $IN_CODE -eq 0 ]]; then IN_CODE=1; else IN_CODE=0; fi
570 echo "$line" >> "$BREAK_INJECTED"
571 continue
572 fi
573
574 # Track frontmatter (only at start of file)
575 if [[ "$line" == "---" && $IN_CODE -eq 0 ]]; then
576 if [[ $IN_FM -eq 0 && $H1_COUNT -eq 0 ]]; then IN_FM=1; else IN_FM=0; fi
577 echo "$line" >> "$BREAK_INJECTED"
578 continue
579 fi
580
581 if [[ $IN_CODE -eq 0 && $IN_FM -eq 0 ]]; then
582 # Before first content after frontmatter: inject TOC page break
583 if [[ $DONE_TOC_BREAK -eq 0 && "$TOC_LEVEL" -gt 0 && -n "$line" ]]; then
584 echo "" >> "$BREAK_INJECTED"
585 echo '```{=latex}' >> "$BREAK_INJECTED"
586 echo '\newpage' >> "$BREAK_INJECTED"
587 echo '```' >> "$BREAK_INJECTED"
588 echo "" >> "$BREAK_INJECTED"
589 DONE_TOC_BREAK=1
590 fi
591
592 # Page break before each top-level section (except first)
593 # Build the marker: numberfrom=1 → "# ", numberfrom=2 → "## "
594 BREAK_HASHES=$(printf '#%.0s' $(seq 1 "$FILE_NUMBER_FROM"))
595 if [[ "$line" == "${BREAK_HASHES} "* ]]; then
596 # Make sure it's exactly that level, not deeper
597 NEXT_CHAR="${line:${#BREAK_HASHES}:1}"
598 if [[ "$NEXT_CHAR" != "#" ]]; then
599 H1_COUNT=$((H1_COUNT + 1))
600 if [[ $H1_COUNT -gt 1 && $FILE_PAGEBREAK -eq 1 ]]; then
601 echo "" >> "$BREAK_INJECTED"
602 echo '```{=latex}' >> "$BREAK_INJECTED"
603 echo '\newpage' >> "$BREAK_INJECTED"
604 echo '```' >> "$BREAK_INJECTED"
605 echo "" >> "$BREAK_INJECTED"
606 fi
607 fi
608 fi
609 fi
610
611 echo "$line" >> "$BREAK_INJECTED"
612done < "$CALLOUT_MD"
613rm -f "$CALLOUT_MD"
614CALLOUT_MD="$BREAK_INJECTED"
615
616# --- Step 2: Pre-render Mermaid blocks to PNG ---
617info "Pre-rendering Mermaid diagrams..."
618
619TEMP_MD=$(mktemp /tmp/pdfify-XXXXXX.md)
620MERMAID_COUNT=0
621IN_MERMAID=0
622MERMAID_BUF=""
623
624while IFS= read -r line || [[ -n "$line" ]]; do
625 if [[ "$line" =~ ^\`\`\`mermaid ]]; then
626 IN_MERMAID=1
627 MERMAID_BUF=""
628 continue
629 fi
630
631 if [[ $IN_MERMAID -eq 1 ]]; then
632 if [[ "$line" =~ ^\`\`\` ]]; then
633 IN_MERMAID=0
634 MERMAID_COUNT=$((MERMAID_COUNT + 1))
635 MERMAID_FILE="/tmp/mermaid-${MERMAID_COUNT}.mmd"
636 MERMAID_PNG="/tmp/mermaid-${MERMAID_COUNT}.png"
637
638 echo "$MERMAID_BUF" > "$MERMAID_FILE"
639
640 detail "Rendering diagram ${CYAN}#${MERMAID_COUNT}${RESET}..."
641 mmdc -i "$MERMAID_FILE" \
642 -o "$MERMAID_PNG" \
643 -w 1600 \
644 -b transparent \
645 -c /opt/mermaid-config.json \
646 -p /opt/puppeteer-config.json \
647 2>/dev/null || {
648 echo -e " ${YELLOW}⚠${RESET} Diagram $MERMAID_COUNT failed — inserting as code block"
649 echo '```' >> "$TEMP_MD"
650 echo "$MERMAID_BUF" >> "$TEMP_MD"
651 echo '```' >> "$TEMP_MD"
652 continue
653 }
654
655 SIZE=$(du -h "$MERMAID_PNG" 2>/dev/null | cut -f1 | tr -d ' ')
656 success "Diagram #${MERMAID_COUNT} rendered ${DIM}(${SIZE})${RESET}"
657
658 echo "" >> "$TEMP_MD"
659 echo "![Diagram ${MERMAID_COUNT}](${MERMAID_PNG})\\" >> "$TEMP_MD"
660 echo "" >> "$TEMP_MD"
661 else
662 MERMAID_BUF="${MERMAID_BUF}${line}
663"
664 fi
665 else
666 echo "$line" >> "$TEMP_MD"
667 fi
668done < "$CALLOUT_MD"
669
670# --- Strip YAML frontmatter so pandoc doesn't generate its own title ---
671# pdfify already parses frontmatter above; letting pandoc see it causes a
672# duplicate title (pandoc's \maketitle + pdfify's custom title banner).
673if head -1 "$TEMP_MD" | grep -q '^---'; then
674 STRIPPED_FM=$(mktemp /tmp/pdfify-nofm-XXXXXX.md)
675 awk 'NR==1 && /^---/{skip=1; next} skip && /^---/{skip=0; next} !skip' "$TEMP_MD" > "$STRIPPED_FM"
676 mv "$STRIPPED_FM" "$TEMP_MD"
677fi
678
679# --- Lua filter: protect brackets in headings for titlesec ---
680# Square brackets in headings break titlesec (\SQSPL@scan error) because LaTeX
681# interprets [ as the start of an optional argument.
682BRACKET_FILTER=$(mktemp /tmp/pdfify-bracket-filter-XXXXXX.lua)
683cat > "$BRACKET_FILTER" <<'LUAFILTER'
684-- Protect square brackets in headings to prevent titlesec \SQSPL@scan errors.
685-- Brackets in headings make titlesec think they are optional arguments.
686-- We replace [ and ] with \lbrack/\rbrack in all inline types.
687
688function Header(el)
689 if FORMAT ~= "latex" and FORMAT ~= "pdf" then return nil end
690
691 el = el:walk {
692 Str = function(s)
693 if s.text:find("[%[%]]") then
694 local t = s.text:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
695 return pandoc.RawInline("latex", t)
696 end
697 end,
698 Code = function(c)
699 -- All code in headings must use \oldtexttt to bypass seqsplit
700 -- (seqsplit in titlesec moving arguments causes \SQSPL@scan errors)
701 local t = c.text
702 t = t:gsub("\\", "\\textbackslash ")
703 t = t:gsub("%%", "\\%%")
704 t = t:gsub("%#", "\\#")
705 t = t:gsub("%$", "\\$")
706 t = t:gsub("%&", "\\&")
707 t = t:gsub("_", "\\_")
708 t = t:gsub("%{", "\\{")
709 t = t:gsub("%}", "\\}")
710 t = t:gsub("~", "\\textasciitilde{}")
711 t = t:gsub("%^", "\\textasciicircum{}")
712 t = t:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
713 return pandoc.RawInline("latex", "\\oldtexttt{" .. t .. "}")
714 end
715 }
716 return el
717end
718LUAFILTER
719
720echo ""
721info "Generating PDF with Pandoc + XeLaTeX..."
722detail "Engine: xelatex"
723detail "Font: Roboto / Roboto Mono"
724detail "Margins: 0.5in, Font size: 10pt"
725echo ""
726
727# Write LaTeX preamble for modern styling
728PREAMBLE=$(mktemp /tmp/pdfify-preamble-XXXXXX.tex)
729cat > "$PREAMBLE" <<'LATEX'
730% --- Modern color scheme ---
731\usepackage{xcolor}
732\definecolor{accent}{HTML}{374151}
733\definecolor{accentdark}{HTML}{111827}
734\definecolor{codebg}{HTML}{F8F9FA}
735\definecolor{codeborder}{HTML}{E2E8F0}
736\definecolor{headrulecolor}{HTML}{E2E8F0}
737
738% --- Callout colors ---
739\definecolor{infobg}{HTML}{EFF6FF}
740\definecolor{infobar}{HTML}{3B82F6}
741\definecolor{infofg}{HTML}{1E40AF}
742\definecolor{tipbg}{HTML}{F0FDF4}
743\definecolor{tipbar}{HTML}{22C55E}
744\definecolor{tipfg}{HTML}{166534}
745\definecolor{warningbg}{HTML}{FFFBEB}
746\definecolor{warningbar}{HTML}{F59E0B}
747\definecolor{warningfg}{HTML}{92400E}
748\definecolor{dangerbg}{HTML}{FEF2F2}
749\definecolor{dangerbar}{HTML}{EF4444}
750\definecolor{dangerfg}{HTML}{991B1B}
751\definecolor{examplebg}{HTML}{F5F3FF}
752\definecolor{examplebar}{HTML}{8B5CF6}
753\definecolor{examplefg}{HTML}{5B21B6}
754\definecolor{quotecallbg}{HTML}{F8F9FA}
755\definecolor{quotecallbar}{HTML}{6B7280}
756\definecolor{quotecallfg}{HTML}{374151}
757
758% --- Code block wrapping and styling ---
759\usepackage{fvextra}
760\DefineVerbatimEnvironment{Highlighting}{Verbatim}{
761 breaklines,
762 breakanywhere,
763 commandchars=\\\{\},
764 fontsize=\small
765}
766
767% Background on code blocks via mdframed
768\usepackage[framemethod=tikz]{mdframed}
769
770% Override pandoc's Shaded environment (define first if pandoc didn't)
771\makeatletter
772\@ifundefined{Shaded}{\newenvironment{Shaded}{}{}}{}
773\makeatother
774\renewenvironment{Shaded}{%
775 \begin{mdframed}[
776 backgroundcolor=codebg,
777 hidealllines=true,
778 roundcorner=4pt,
779 innertopmargin=8pt,
780 innerbottommargin=8pt,
781 innerleftmargin=10pt,
782 innerrightmargin=10pt,
783 skipabove=10pt,
784 skipbelow=10pt
785 ]
786}{%
787 \end{mdframed}
788}
789
790% --- Callout environments ---
791\newenvironment{calloutbase}[3]{%
792 \begin{mdframed}[
793 backgroundcolor=#1,
794 linecolor=#2,
795 linewidth=3pt,
796 topline=false,
797 bottomline=false,
798 rightline=false,
799 innertopmargin=12pt,
800 innerbottommargin=12pt,
801 innerleftmargin=12pt,
802 innerrightmargin=12pt,
803 skipabove=12pt,
804 skipbelow=12pt,
805 roundcorner=0pt
806 ]
807 \textbf{\color{#2}#3}\par\smallskip\setlength{\parindent}{0pt}
808}{%
809 \end{mdframed}
810}
811
812\newenvironment{calloutinfo}[1]{\begin{calloutbase}{infobg}{infobar}{#1}}{\end{calloutbase}}
813\newenvironment{callouttip}[1]{\begin{calloutbase}{tipbg}{tipbar}{#1}}{\end{calloutbase}}
814\newenvironment{calloutwarning}[1]{\begin{calloutbase}{warningbg}{warningbar}{#1}}{\end{calloutbase}}
815\newenvironment{calloutdanger}[1]{\begin{calloutbase}{dangerbg}{dangerbar}{#1}}{\end{calloutbase}}
816\newenvironment{calloutexample}[1]{\begin{calloutbase}{examplebg}{examplebar}{#1}}{\end{calloutbase}}
817\newenvironment{calloutquote}[1]{\begin{calloutbase}{quotecallbg}{quotecallbar}{#1}}{\end{calloutbase}}
818
819% --- PDF bookmarks (sidebar navigation in PDF viewers) ---
820\usepackage{bookmark}
821\bookmarksetup{
822 numbered=false,
823 open,
824 openlevel=2
825}
826
827% --- Title banner ---
828\definecolor{titlebg}{HTML}{E5E7EB}
829
830% --- Page break after TOC ---
831\let\oldtableofcontents\tableofcontents
832\renewcommand{\tableofcontents}{\oldtableofcontents\clearpage}
833
834% --- TOC styling ---
835\usepackage{tocloft}
836\setlength{\cftbeforetoctitleskip}{0.5em}
837\renewcommand{\cfttoctitlefont}{\LARGE\bfseries\color{accentdark}\scshape}
838\renewcommand{\cftaftertoctitle}{\par\vspace{2pt}{\color{headrulecolor}\hrule height 1pt}\vspace{10pt}}
839\renewcommand{\cftsecfont}{\bfseries\color{accentdark}}
840\renewcommand{\cftsecpagefont}{\bfseries\color{accentdark}}
841\renewcommand{\cftsubsecfont}{\color{accent}}
842\renewcommand{\cftsubsecpagefont}{\color{accent}}
843\renewcommand{\cftsubsubsecfont}{\small\color{accent}}
844\renewcommand{\cftsubsubsecpagefont}{\small\color{accent}}
845\renewcommand{\cftsecleader}{\cftdotfill{\cftsecdotsep}}
846\renewcommand{\cftsecdotsep}{\cftdotsep}
847\setlength{\cftbeforesecskip}{6pt}
848\setlength{\cftbeforesubsecskip}{2pt}
849
850% --- Heading font ---
851\newfontfamily\headingfont{Roboto}[BoldFont={Roboto Bold}]
852
853% --- Symbol fallback (arrows, etc.) ---
854\usepackage{newunicodechar}
855\newfontfamily\fallbackfont{Liberation Sans}[Scale=MatchLowercase]
856\newunicodechar{→}{{\fallbackfont →}}
857\newunicodechar{←}{{\fallbackfont ←}}
858\newunicodechar{↔}{{\fallbackfont ↔}}
859\newunicodechar{⇒}{{\fallbackfont ⇒}}
860\newunicodechar{⇐}{{\fallbackfont ⇐}}
861\newunicodechar{✓}{{\fallbackfont ✓}}
862\newunicodechar{✗}{{\fallbackfont ✗}}
863
864% --- Modern section headings (tight, bold, dark) ---
865\usepackage{titlesec}
866
867% H1: # headings — large, small caps, dark, with rule
868\titleformat{\section}
869 {\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
870 {\thesection}{0.5em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
871\titlespacing*{\section}{0pt}{20pt}{10pt}
872
873% H2: ## headings
874\titleformat{\subsection}
875 {\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
876 {\thesubsection}{0.5em}{}
877\titlespacing*{\subsection}{0pt}{16pt}{8pt}
878
879% H3: ### headings
880\titleformat{\subsubsection}
881 {\large\bfseries\color{accent}}
882 {\thesubsubsection}{0.5em}{}
883\titlespacing*{\subsubsection}{0pt}{12pt}{6pt}
884
885% H4: #### headings
886\titleformat{\paragraph}[hang]
887 {\normalsize\bfseries\color{accent}}
888 {\theparagraph}{0.5em}{}
889\titlespacing*{\paragraph}{0pt}{10pt}{4pt}
890
891%%SECNUMDEPTH_PLACEHOLDER%%
892
893% --- Page style (header/footer injected by pdfify) ---
894\usepackage{fancyhdr}
895\pagestyle{fancy}
896\fancyhf{}
897\renewcommand{\headrulewidth}{0pt}
898\renewcommand{\footrulewidth}{0pt}
899\setlength{\headheight}{14pt}
900%%HEADER_PLACEHOLDER%%
901%%FOOTER_PLACEHOLDER%%
902% Make plain style identical to fancy (so title/TOC pages get the same footer)
903\fancypagestyle{plain}{\fancyhf{}\renewcommand{\headrulewidth}{0pt}\renewcommand{\footrulewidth}{0pt}%%FOOTER_PLAIN%%}
904
905% --- Blockquote styling (plain > quotes, not callouts) ---
906\usepackage{etoolbox}
907\renewenvironment{quote}{%
908 \begin{mdframed}[
909 backgroundcolor=infobg,
910 linecolor=infobar,
911 linewidth=3pt,
912 topline=false,
913 bottomline=false,
914 rightline=false,
915 innertopmargin=12pt,
916 innerbottommargin=12pt,
917 innerleftmargin=12pt,
918 innerrightmargin=12pt,
919 skipabove=10pt,
920 skipbelow=10pt,
921 roundcorner=0pt
922 ]%
923}{%
924 \end{mdframed}%
925}
926
927% --- Table styling ---
928\usepackage{booktabs}
929\usepackage{colortbl}
930\usepackage{longtable}
931\usepackage{tabularx}
932\arrayrulecolor{codeborder}
933
934% Alternating row shading
935\definecolor{tablerowgray}{HTML}{F3F4F6}
936\let\oldlongtable\longtable
937\let\endoldlongtable\endlongtable
938\renewenvironment{longtable}{\rowcolors{2}{white}{tablerowgray}\oldlongtable}{\endoldlongtable}
939
940% Allow line breaks in table cells and shrink monospace to fit
941\usepackage{array}
942\renewcommand{\arraystretch}{1.4}
943\let\oldtexttt\texttt
944\renewcommand{\texttt}[1]{{\small\oldtexttt{\seqsplit{#1}}}}
945\usepackage{seqsplit}
946\setlength{\tabcolsep}{4pt}
947
948% --- Images constrained to page ---
949\usepackage{grffile}
950\usepackage[export]{adjustbox}
951\let\oldincludegraphics\includegraphics
952\renewcommand{\includegraphics}[2][]{%
953 \oldincludegraphics[max width=\textwidth,max height=0.45\textheight,keepaspectratio,#1]{#2}%
954}
955
956% --- Figures don't float ---
957\usepackage{float}
958\floatplacement{figure}{H}
959
960% --- Caption styling ---
961\usepackage{caption}
962\captionsetup{labelformat=empty,font={small,color=gray},skip=4pt}
963
964% --- Tighter lists ---
965\usepackage{enumitem}
966\setlist{nosep,leftmargin=1.5em}
967
968% --- Links ---
969\usepackage{hyperref}
970\hypersetup{
971 colorlinks=true,
972 linkcolor=accent,
973 urlcolor=accent,
974 citecolor=accent
975}
976
977% --- Horizontal rules ---
978\renewcommand{\rule}[2]{\textcolor{headrulecolor}{\vrule width \textwidth height 0.5pt}}
979LATEX
980
981TOC_LEVEL="${TOC_LEVEL:-3}"
982FM_FOOTER="${FM_FOOTER:-}"
983FM_HEADER="${FM_HEADER:-}"
984FM_AUTHOR="${FM_AUTHOR:-}"
985FM_DATE="${FM_DATE:-}"
986FM_DATE_LABEL="${FM_DATE_LABEL:-}"
987FM_DATE_HASH="${FM_DATE_HASH:-}"
988FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
989FILE_NUMBERS="${FILE_NUMBERS:-1}"
990FILE_NUMBER_FROM="${FILE_NUMBER_FROM:-2}"
991FILE_PAGEBREAK="${FILE_PAGEBREAK:-1}"
992
993# Escape LaTeX special characters in text fields (uses sed to avoid
994# bash parameter substitution brace-parsing issues with } in replacements)
995latex_escape() {
996 printf '%s' "$1" | sed \
997 -e 's/\\/@@BSLASH@@/g' \
998 -e 's/&/\\&/g' \
999 -e 's/%/\\%/g' \
1000 -e 's/\$/\\$/g' \
1001 -e 's/#/\\#/g' \
1002 -e 's/_/\\_/g' \
1003 -e 's/{/\\{/g' \
1004 -e 's/}/\\}/g' \
1005 -e 's/~/\\textasciitilde{}/g' \
1006 -e 's/\^/\\textasciicircum{}/g' \
1007 -e 's/@@BSLASH@@/\\textbackslash{}/g'
1008}
1009
1010# Inject title banner into preamble
1011FM_TITLE="${FM_TITLE:-}"
1012FM_TITLE_TEX="$(latex_escape "$FM_TITLE")"
1013FM_SUBTITLE_TEX="$(latex_escape "${FM_SUBTITLE:-}")"
1014FM_AUTHOR_TEX="$(latex_escape "${FM_AUTHOR:-}")"
1015
1016{
1017if [[ -n "$FM_TITLE" ]]; then
1018 cat <<'TITLE_STATIC'
1019\makeatletter
1020\renewcommand{\maketitle}{%
1021 \thispagestyle{fancy}%
1022 \vspace*{-\topskip}%
1023 \vspace*{-\headsep}%
1024 \vspace*{-\headheight}%
1025 \vspace*{-0.55in}%
1026 \noindent\hspace*{-0.5in}%
1027 \fcolorbox{titlebg}{titlebg}{%
1028 \parbox{\dimexpr\paperwidth-2\fboxsep-2\fboxrule}{%
1029 \hspace*{0.3in}\begin{minipage}{\dimexpr\textwidth}%
1030 \vspace{20pt}%
1031TITLE_STATIC
1032
1033 echo " {\\fontsize{28}{34}\\selectfont\\bfseries\\color{black}${FM_TITLE_TEX}}\\\\[6pt]%"
1034
1035 FM_SUBTITLE="${FM_SUBTITLE:-}"
1036 if [[ -n "$FM_SUBTITLE" ]]; then
1037 echo " {\\fontsize{14}{18}\\selectfont\\color{black}${FM_SUBTITLE_TEX}}\\\\[8pt]%"
1038 fi
1039
1040 if [[ -n "$FM_AUTHOR" ]]; then
1041 echo " {\\fontsize{11}{14}\\selectfont\\color{black}${FM_AUTHOR_TEX}}\\\\[6pt]%"
1042 fi
1043
1044 if [[ -n "$FM_DATE" ]]; then
1045 DATE_VAL=""
1046 if [[ -n "$FM_DATE_HASH" ]]; then
1047 DATE_VAL="${FM_DATE% · *} · {\\texttt{${FM_DATE_HASH}}}"
1048 else
1049 DATE_VAL="${FM_DATE}"
1050 fi
1051 DIRTY_PART=""
1052 if [[ -n "${FM_DATE_DIRTY:-}" ]]; then
1053 DIRTY_PART=" {\\color{gray}\\itshape (dirty)}"
1054 fi
1055 if [[ -n "$FM_DATE_LABEL" ]]; then
1056 echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL} {\\color{gray}--- ${FM_DATE_LABEL}}${DIRTY_PART}}\\\\[4pt]%"
1057 else
1058 echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL}${DIRTY_PART}}\\\\[4pt]%"
1059 fi
1060 fi
1061
1062 cat <<'TITLE_END'
1063 \vspace{6pt}%
1064 \end{minipage}%
1065 }%
1066 }%
1067 \par\vspace{20pt}%
1068}
1069\makeatother
1070TITLE_END
1071 echo '\AtBeginDocument{\maketitle}'
1072else
1073 echo '\renewcommand{\maketitle}{}'
1074fi
1075} >> "$PREAMBLE"
1076
1077# Inject header/footer into preamble
1078GIT_STAMP="${GIT_STAMP:-}"
1079
1080FOOTER_L=""
1081FOOTER_C=""
1082FOOTER_R="\\\\fancyfoot[R]{\\\\color{gray}\\\\small Page \\\\thepage\\\\ of \\\\pageref*{LastPage}}"
1083
1084[[ -n "$FM_FOOTER" ]] && FOOTER_L="\\\\fancyfoot[L]{\\\\color{gray}\\\\small ${FM_FOOTER}}"
1085
1086sed -i "s|%%FOOTER_PLACEHOLDER%%|\\\\usepackage{lastpage}${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
1087sed -i "s|%%FOOTER_PLAIN%%|${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
1088
1089if [[ -n "$FM_HEADER" ]]; then
1090 sed -i "s|%%HEADER_PLACEHOLDER%%|\\\\fancyhead[C]{\\\\color{gray}\\\\small ${FM_HEADER}}|" "$PREAMBLE"
1091else
1092 sed -i "s|%%HEADER_PLACEHOLDER%%||" "$PREAMBLE"
1093fi
1094
1095# Inject watermark if set
1096FM_WATERMARK="${FM_WATERMARK:-}"
1097if [[ -n "$FM_WATERMARK" ]]; then
1098 cat >> "$PREAMBLE" <<WATERMARK
1099\\usepackage{eso-pic}
1100\\usepackage{tikz}
1101\\AddToShipoutPictureFG{%
1102 \\begin{tikzpicture}[remember picture,overlay]
1103 \\node[rotate=45,opacity=0.12,scale=10,text=red] at (current page.center) {\\textsf{\\textbf{\\MakeUppercase{${FM_WATERMARK}}}}};
1104 \\end{tikzpicture}%
1105}
1106WATERMARK
1107 detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
1108fi
1109
1110# Build TOC flags
1111# When numbering is on, headings shift by -1, so TOC depth needs +1 to compensate
1112TOC_FLAGS=()
1113if [[ "$TOC_LEVEL" -gt 0 ]]; then
1114 TOC_FLAGS+=(--toc --toc-depth="$TOC_LEVEL")
1115 detail "TOC depth: ${CYAN}${TOC_LEVEL}${RESET}"
1116else
1117 detail "TOC: ${DIM}disabled${RESET}"
1118fi
1119
1120AUTHOR_FLAGS=()
1121if [[ -n "$FM_AUTHOR" ]]; then
1122 AUTHOR_FLAGS+=(-M "author=$FM_AUTHOR")
1123fi
1124
1125# Numbered sections
1126NUMBER_FLAGS=()
1127if [[ "$FILE_NUMBERS" == "1" ]]; then
1128 NUMBER_FLAGS+=(--number-sections)
1129
1130 # numberfrom controls which heading level starts getting numbers
1131 # pandoc: section=1, subsection=2, subsubsection=3
1132 cat >> "$PREAMBLE" <<SECNUM
1133\\setcounter{secnumdepth}{4}
1134SECNUM
1135
1136 if [[ "$FILE_NUMBER_FROM" -ge 2 ]]; then
1137 # H1 (\section) unnumbered, H2 numbered as 1, 2, 3
1138 cat >> "$PREAMBLE" <<'SECNUM2'
1139\makeatletter
1140\renewcommand{\thesection}{}
1141\renewcommand{\thesubsection}{\arabic{subsection}}
1142\renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}}
1143% Remove section number from titleformat without changing style
1144\titleformat{\section}
1145 {\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
1146 {}{0em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
1147\makeatother
1148SECNUM2
1149 fi
1150
1151 if [[ "$FILE_NUMBER_FROM" -ge 3 ]]; then
1152 cat >> "$PREAMBLE" <<'SECNUM3'
1153\renewcommand{\thesubsection}{}
1154\renewcommand{\thesubsubsection}{\arabic{subsubsection}}
1155\titleformat{\subsection}
1156 {\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
1157 {}{0em}{}
1158SECNUM3
1159 fi
1160fi
1161
1162# Remove placeholder
1163sed -i 's|%%SECNUMDEPTH_PLACEHOLDER%%||' "$PREAMBLE"
1164
1165pandoc "$TEMP_MD" \
1166 -o "$OUTPUT_FILE" \
1167 --pdf-engine=xelatex \
1168 --lua-filter="$BRACKET_FILTER" \
1169 --resource-path=".:$WORKDIR" \
1170 --columns=72 \
1171 -V geometry:"margin=0.5in,includehead,includefoot" \
1172 -V fontsize=10pt \
1173 -V mainfont="Roboto" \
1174 -V monofont="Roboto Mono" \
1175 "${TOC_FLAGS[@]}" \
1176 "${AUTHOR_FLAGS[@]}" \
1177 "${NUMBER_FLAGS[@]}" \
1178 --highlight-style=tango \
1179 -H "$PREAMBLE" \
1180 --standalone
1181
1182rm -f "$TEMP_MD" "$CALLOUT_MD" "$PREAMBLE" "${BRACKET_FILTER:-}" "${STRIPPED:-}" /tmp/mermaid-*.mmd /tmp/mermaid-*.png
1183
1184PAGES=$(strings "$OUTPUT_FILE" 2>/dev/null | grep -c '/Type /Page' || echo "?")
1185SIZE=$(du -h "$OUTPUT_FILE" | cut -f1 | tr -d ' ')
1186success "PDF generated: ${CYAN}${SIZE}${RESET}, ~${CYAN}${PAGES}${RESET} pages"
1187INNER_SCRIPT
1188
1189chmod +x "$CONVERT_SCRIPT"
1190
1191# --- Run Docker ---
1192echo ""
1193info "Launching Docker container..."
1194detail "Mounting: ${CYAN}${INPUT_DIR}${RESET} → /work ${DIM}(read-only)${RESET}"
1195detail "Output: ${CYAN}${OUTPUT_DIR}${RESET} → /output"
1196echo ""
1197
1198CONVERT_BASENAME="$(basename "$CONVERT_SCRIPT")"
1199docker run --rm \
1200 -v "$INPUT_DIR:/work:ro" \
1201 -v "$OUTPUT_DIR:/output" \
1202 -e "TOC_LEVEL=$FILE_TOC_LEVEL" \
1203 -e "FM_FOOTER=$FM_FOOTER" \
1204 -e "FM_HEADER=$FM_HEADER" \
1205 -e "FM_AUTHOR=$FM_AUTHOR" \
1206 -e "FM_TITLE=$FM_TITLE" \
1207 -e "FM_SUBTITLE=$FM_SUBTITLE" \
1208 -e "FM_DATE=$FM_DATE" \
1209 -e "FM_DATE_LABEL=${FM_DATE_LABEL:-}" \
1210 -e "FM_DATE_HASH=${FM_DATE_HASH:-}" \
1211 -e "FM_DATE_DIRTY=${FM_DATE_DIRTY:-}" \
1212 -e "GIT_STAMP=${GIT_STAMP:-}" \
1213 -e "FILE_NUMBERS=$FILE_NUMBERS" \
1214 -e "FILE_NUMBER_FROM=$FILE_NUMBER_FROM" \
1215 -e "HIDE_FIRST_H1=$HIDE_FIRST_H1" \
1216 -e "FM_WATERMARK=$FM_WATERMARK" \
1217 -e "FILE_PAGEBREAK=$FILE_PAGEBREAK" \
1218 --tmpfs /tmp:exec \
1219 "$IMAGE_NAME" "/work/$CONVERT_BASENAME" "$INPUT_FILE" "/output/$OUTPUT_FILE" \
1220 || {
1221 echo ""
1222 echo -e " ${RED}${BOLD}Error producing PDF.${RESET} Docker/pandoc exited with a non-zero status."
1223 echo ""
1224 return 1
1225 }
1226
1227# Move preview file to /tmp and clean up
1228if [[ -n "$PREVIEW_FINAL" ]]; then
1229 mv "$OUTPUT" "$PREVIEW_FINAL"
1230 OUTPUT="$PREVIEW_FINAL"
1231fi
1232
1233echo ""
1234echo -e " ${GREEN}${BOLD}PDF created:${RESET} ${CYAN}${OUTPUT}${RESET}"
1235echo ""
1236
1237# Open if requested
1238if [[ $OPEN -eq 1 ]]; then
1239 open_pdf "$OUTPUT"
1240fi
1241}
1242
1243# --- Process each input file ---
1244run_all() {
1245 local FAILED=0
1246 for input_file in "${POSITIONAL[@]}"; do
1247 convert_file "$input_file" "$OUT_FILE" || FAILED=$((FAILED + 1))
1248 done
1249
1250 if [[ $FAILED -eq 0 ]]; then
1251 header "Complete! (${#POSITIONAL[@]} file(s))"
1252 else
1253 header "${FAILED} of ${#POSITIONAL[@]} file(s) failed"
1254 fi
1255}
1256
1257run_all
1258
1259# --- Watch mode ---
1260if [[ $WATCH -eq 1 ]]; then
1261 info "Watching for changes... ${DIM}(Ctrl+C to stop)${RESET}"
1262 echo ""
1263
1264 # Get initial checksums (using a temp file instead of associative array for bash 3 compat)
1265 CHECKSUM_FILE=$(mktemp)
1266 trap 'rm -f "$CHECKSUM_FILE"' EXIT
1267 for f in "${POSITIONAL[@]}"; do
1268 fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
1269 echo "$(_sha256 < "$fpath" | cut -d' ' -f1) $fpath" >> "$CHECKSUM_FILE"
1270 done
1271
1272 while true; do
1273 sleep 2
1274 CHANGED=0
1275 for f in "${POSITIONAL[@]}"; do
1276 fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
1277 NEW_HASH=$(_sha256 < "$fpath" | cut -d' ' -f1)
1278 OLD_HASH=$(grep " $fpath\$" "$CHECKSUM_FILE" | cut -d' ' -f1)
1279 if [[ "$NEW_HASH" != "$OLD_HASH" ]]; then
1280 CHANGED=1
1281 # Update stored checksum
1282 grep -v " $fpath\$" "$CHECKSUM_FILE" > "${CHECKSUM_FILE}.tmp" || true
1283 echo "$NEW_HASH $fpath" >> "${CHECKSUM_FILE}.tmp"
1284 mv "${CHECKSUM_FILE}.tmp" "$CHECKSUM_FILE"
1285 fi
1286 done
1287 if [[ $CHANGED -eq 1 ]]; then
1288 echo ""
1289 info "Change detected — rebuilding..."
1290 echo ""
1291 run_all
1292 fi
1293 done
1294fi
1295
1296# Check for updates (runs after success, fast timeout)
1297check_for_update
1298