pdfify.sh
· 45 KiB · Bash
Raw
#!/usr/bin/env bash
set -euo pipefail
# pdfify — Convert Markdown to beautiful PDF via Docker
# Supports: images, mermaid diagrams, tables, code blocks, Obsidian callouts
# Usage: ./pdfify <file.md> [file2.md ...] [options]
VERSION="1.2.0"
IMAGE_NAME="pdfify"
GIST_ID="23f4514a1f0da1347d3f89926c23b68f"
GIST_RAW="https://gist.githubusercontent.com/jclement/${GIST_ID}/raw/pdfify.sh"
SELF="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
# --- Portable SHA-256 (macOS has shasum, Linux often has sha256sum) ---
_sha256() { shasum -a 256 "$@" 2>/dev/null || sha256sum "$@"; }
info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
success() { echo -e "${GREEN}✓${RESET} $*"; }
warn() { echo -e "${YELLOW}⚠${RESET} $*"; }
detail() { echo -e " ${DIM}→${RESET} $*"; }
header() { echo -e "\n${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"; echo -e "${MAGENTA} ${BOLD}$*${RESET}"; echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"; }
# --- Self-update ---
do_update() {
info "Updating pdfify from gist..."
local raw_url tmp
raw_url=$(curl -fsSL "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//')
[[ -z "$raw_url" ]] && raw_url="$GIST_RAW"
tmp=$(mktemp)
if curl -fsSL "$raw_url" -o "$tmp" 2>/dev/null; then
if [[ -s "$tmp" ]] && head -1 "$tmp" | grep -q '^#!/'; then
chmod +x "$tmp"
mv "$tmp" "$SELF"
success "Updated to latest version"
detail "${CYAN}${SELF}${RESET}"
else
rm -f "$tmp"
echo -e "${RED}Error:${RESET} Downloaded file doesn't look like a script"
exit 1
fi
else
rm -f "$tmp"
echo -e "${RED}Error:${RESET} Failed to download update"
exit 1
fi
exit 0
}
check_for_update() {
local remote_hash local_hash raw_url
raw_url=$(curl -fsSL --connect-timeout 2 --max-time 3 "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//') || return 0
[[ -z "$raw_url" ]] && return 0
remote_hash=$(curl -fsSL --connect-timeout 2 --max-time 5 "$raw_url" 2>/dev/null | _sha256 | cut -d' ' -f1) || return 0
local_hash=$(_sha256 < "$SELF" | cut -d' ' -f1)
if [[ -n "$remote_hash" && "$remote_hash" != "$local_hash" ]]; then
echo -e "${YELLOW}⚠${RESET} ${DIM}A newer version of pdfify is available. Run ${CYAN}pdfify --update${DIM} to upgrade.${RESET}"
fi
}
# --- Args (CLI overrides frontmatter; "" means "use frontmatter default") ---
REBUILD=0
WATCH=0
OPEN=0
PREVIEW=0
OUT_FILE=""
NEXT_KEY=""
POSITIONAL=()
# CLI overrides — empty string means "not set, defer to frontmatter"
CLI_TOC_LEVEL=""
CLI_NUMBERS=""
CLI_NUMBER_FROM=""
CLI_TITLE=""
CLI_SUBTITLE=""
CLI_AUTHOR=""
CLI_HEADER=""
CLI_FOOTER=""
CLI_DATE=""
CLI_WATERMARK=""
for arg in "$@"; do
if [[ -n "$NEXT_KEY" ]]; then
case "$NEXT_KEY" in
toc-level) CLI_TOC_LEVEL="$arg" ;;
number-from) CLI_NUMBER_FROM="$arg" ;;
out) OUT_FILE="$arg" ;;
title) CLI_TITLE="$arg" ;;
subtitle) CLI_SUBTITLE="$arg" ;;
author) CLI_AUTHOR="$arg" ;;
header) CLI_HEADER="$arg" ;;
footer) CLI_FOOTER="$arg" ;;
date) CLI_DATE="$arg" ;;
watermark) CLI_WATERMARK="$arg" ;;
esac
NEXT_KEY=""
continue
fi
case "$arg" in
--rebuild) REBUILD=1 ;;
--update) do_update ;;
--watch) WATCH=1 ;;
--open) OPEN=1 ;;
--preview) PREVIEW=1; OPEN=1 ;;
--no-numbers) CLI_NUMBERS="false" ;;
--numbers) CLI_NUMBERS="true" ;;
--clean) echo -e "${BLUE}::${RESET} ${BOLD}Removing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
docker rmi "$IMAGE_NAME" >/dev/null 2>&1 && echo -e "${GREEN}✓${RESET} Image removed" || echo -e "${DIM}Image not found${RESET}"
exit 0 ;;
--toc-level) NEXT_KEY="toc-level" ;;
--toc-level=*) CLI_TOC_LEVEL="${arg#*=}" ;;
--number-from) NEXT_KEY="number-from" ;;
--number-from=*) CLI_NUMBER_FROM="${arg#*=}" ;;
--out) NEXT_KEY="out" ;;
--out=*) OUT_FILE="${arg#*=}" ;;
--title) NEXT_KEY="title" ;;
--title=*) CLI_TITLE="${arg#*=}" ;;
--subtitle) NEXT_KEY="subtitle" ;;
--subtitle=*) CLI_SUBTITLE="${arg#*=}" ;;
--author) NEXT_KEY="author" ;;
--author=*) CLI_AUTHOR="${arg#*=}" ;;
--header) NEXT_KEY="header" ;;
--header=*) CLI_HEADER="${arg#*=}" ;;
--footer) NEXT_KEY="footer" ;;
--footer=*) CLI_FOOTER="${arg#*=}" ;;
--date) NEXT_KEY="date" ;;
--date=*) CLI_DATE="${arg#*=}" ;;
--watermark) NEXT_KEY="watermark" ;;
--watermark=*) CLI_WATERMARK="${arg#*=}" ;;
--version) echo "pdfify v${VERSION}"; exit 0 ;;
--help|-h) echo -e "${BOLD}pdfify${RESET} v${VERSION} — Markdown to PDF"
echo ""
echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo -e " ${DIM}--out FILE${RESET} Output file (single input only)"
echo -e " ${DIM}--toc-level N${RESET} TOC depth: 0=none, 1=H1, 2=H2, 3=H3 (default: 3)"
echo -e " ${DIM}--numbers${RESET} Enable numbered headings (default)"
echo -e " ${DIM}--no-numbers${RESET} Disable numbered headings"
echo -e " ${DIM}--number-from N${RESET} Start numbering at heading level N (default: 2)"
echo -e " ${DIM}--open${RESET} Open PDF after generation"
echo -e " ${DIM}--preview${RESET} Render to /tmp and open (no permanent file)"
echo -e " ${DIM}--watch${RESET} Watch for changes and regenerate"
echo -e " ${DIM}--rebuild${RESET} Force rebuild the Docker image"
echo -e " ${DIM}--clean${RESET} Remove the Docker image"
echo -e " ${DIM}--update${RESET} Update pdfify to latest version from gist"
echo -e " ${DIM}--version${RESET} Show version"
echo ""
echo -e "${BOLD}Overrides${RESET} (CLI trumps frontmatter):"
echo -e " ${DIM}--title TEXT${RESET} ${DIM}--subtitle TEXT${RESET}"
echo -e " ${DIM}--author TEXT${RESET} ${DIM}--header TEXT${RESET}"
echo -e " ${DIM}--footer TEXT${RESET} ${DIM}--date TEXT${RESET}"
echo -e " ${DIM}--watermark TEXT${RESET}"
echo ""
echo -e "${BOLD}Frontmatter:${RESET}"
echo -e " title, subtitle, author, header, footer, toc-level, date,"
echo -e " numbersections (true/false), numberfrom (1-4), watermark,"
echo -e " pagebreak (true/false — page break before each top-level heading)"
exit 0 ;;
*) POSITIONAL+=("$arg") ;;
esac
done
if [[ ${#POSITIONAL[@]} -lt 1 ]]; then
echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]"
echo -e " Run ${CYAN}pdfify --help${RESET} for all options"
exit 1
fi
if [[ -n "$OUT_FILE" && ${#POSITIONAL[@]} -gt 1 ]]; then
echo -e "${RED}Error:${RESET} --out cannot be used with multiple input files"
exit 1
fi
# --- Open helper ---
open_pdf() {
local pdf="$1"
if command -v open >/dev/null 2>&1; then
open "$pdf"
elif command -v xdg-open >/dev/null 2>&1; then
xdg-open "$pdf"
fi
}
header "pdfify v${VERSION}"
# --- Embedded Dockerfile ---
DOCKERFILE=$(cat <<'DOCKERFILE_END'
FROM node:20-slim
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends \
pandoc \
texlive-latex-recommended \
texlive-latex-extra \
texlive-fonts-recommended \
texlive-fonts-extra \
texlive-xetex \
lmodern \
librsvg2-bin \
chromium \
ca-certificates \
fonts-liberation \
fonts-roboto \
fonts-roboto-unhinted \
fonts-noto-color-emoji \
wget \
fontconfig \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/fonts/truetype/roboto-mono && \
for style in Regular Bold Italic BoldItalic Medium MediumItalic Light LightItalic; do \
wget -q "https://github.com/googlefonts/RobotoMono/raw/main/fonts/ttf/RobotoMono-${style}.ttf" \
-O "/usr/share/fonts/truetype/roboto-mono/RobotoMono-${style}.ttf" 2>/dev/null || true; \
done && \
fc-cache -f
RUN npm install -g @mermaid-js/mermaid-cli
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
ENV CHROME_PATH=/usr/bin/chromium
RUN echo '{"maxTextSize": 90000, "flowchart": {"useMaxWidth": true}, "theme": "base", "themeVariables": {"primaryColor": "#3B82F6", "primaryBorderColor": "#1E40AF", "primaryTextColor": "#1E293B", "lineColor": "#475569", "xyChart": {"backgroundColor": "transparent", "plotColorPalette": "#2563EB,#DC2626,#16A34A,#D97706,#9333EA,#0891B2"}}}' > /opt/mermaid-config.json
RUN echo '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]}' > /opt/puppeteer-config.json
WORKDIR /work
ENTRYPOINT ["/bin/bash"]
DOCKERFILE_END
)
# --- Build Docker image ---
echo ""
if [[ $REBUILD -eq 1 ]]; then
info "Removing existing Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
docker rmi "$IMAGE_NAME" >/dev/null 2>&1 || true
success "Image removed"
fi
if docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
info "Docker image ${GREEN}${IMAGE_NAME}${RESET} found ${DIM}(cached)${RESET}"
success "Reusing existing image"
else
info "Building Docker image ${CYAN}${IMAGE_NAME}${RESET}..."
detail "Installing: pandoc, XeLaTeX, mermaid-cli, Chromium, fonts"
detail "This takes 2-3 minutes on first run (cached after)"
echo ""
BUILD_CTX=$(mktemp -d)
echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX" 2>&1 | while IFS= read -r line; do
if [[ "$line" =~ ^Step\ ([0-9]+)/([0-9]+) ]]; then
echo -e " ${CYAN}[${BASH_REMATCH[1]}/${BASH_REMATCH[2]}]${RESET} ${DIM}${line#*: }${RESET}"
elif [[ "$line" == *"Successfully tagged"* ]]; then
echo -e " ${GREEN}${line}${RESET}"
elif [[ "$line" == *"ERROR"* || "$line" == *"error"* ]]; then
echo -e " ${RED}${line}${RESET}"
fi
done
rm -rf "$BUILD_CTX"
if ! docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then
echo -e "\n${RED}Docker build failed. Re-running with full output:${RESET}\n"
BUILD_CTX=$(mktemp -d)
echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX"
rm -rf "$BUILD_CTX"
exit 1
fi
success "Docker image built"
fi
# === Per-file conversion ===
convert_file() {
local INPUT_PATH="$1"
local OUTPUT_OVERRIDE="$2"
# --- Resolve paths ---
local INPUT INPUT_DIR INPUT_FILE OUTPUT OUTPUT_DIR OUTPUT_FILE
INPUT="$(cd "$(dirname "$INPUT_PATH")" && pwd)/$(basename "$INPUT_PATH")"
if [[ ! -f "$INPUT" ]]; then
echo -e "${RED}Error:${RESET} $INPUT_PATH not found"
return 1
fi
INPUT_DIR="$(dirname "$INPUT")"
INPUT_FILE="$(basename "$INPUT")"
OUTPUT="${OUTPUT_OVERRIDE:-${INPUT%.md}.pdf}"
OUTPUT_DIR="$(cd "$(dirname "$OUTPUT")" 2>/dev/null && pwd || (mkdir -p "$(dirname "$OUTPUT")" && cd "$(dirname "$OUTPUT")" && pwd))"
OUTPUT="${OUTPUT_DIR}/$(basename "$OUTPUT")"
OUTPUT_FILE="$(basename "$OUTPUT")"
# Preview mode: write temp file in input dir (Docker-mountable), move to /tmp after
local PREVIEW_FINAL=""
if [[ $PREVIEW -eq 1 ]]; then
local base="${INPUT_FILE%.md}"
PREVIEW_FINAL="/tmp/pdfify-preview-${base}.pdf"
OUTPUT_FILE=".pdfify-preview-${base}.pdf"
OUTPUT="${OUTPUT_DIR}/${OUTPUT_FILE}"
fi
info "Input: ${CYAN}${INPUT}${RESET}"
if [[ -n "$PREVIEW_FINAL" ]]; then
info "Output: ${CYAN}${PREVIEW_FINAL}${RESET} ${DIM}(preview)${RESET}"
else
info "Output: ${CYAN}${OUTPUT}${RESET}"
fi
# --- Parse YAML frontmatter ---
local FM_TITLE="" FM_SUBTITLE="" FM_AUTHOR="" FM_FOOTER="" FM_HEADER=""
local FM_TOC_LEVEL="" FM_DATE="" FM_NUMBERSECTIONS="" FM_NUMBERFROM="" FM_WATERMARK=""
local FM_DATE_HASH="" FM_DATE_DIRTY="" FM_DATE_LABEL=""
if head -1 "$INPUT" | grep -q '^---'; then
local FM_BLOCK
FM_BLOCK=$(awk 'NR==1 && /^---/{found=1; next} found && /^---/{exit} found{print}' "$INPUT")
extract_fm() { echo "$FM_BLOCK" | sed -n "s/^$1:[[:space:]]*//p" | sed 's/^["'"'"']\(.*\)["'"'"']$/\1/'; }
FM_TITLE=$(extract_fm "title")
FM_AUTHOR=$(extract_fm "author")
FM_SUBTITLE=$(extract_fm "subtitle")
FM_FOOTER=$(extract_fm "footer")
FM_HEADER=$(extract_fm "header")
FM_TOC_LEVEL=$(extract_fm "toc-level")
FM_DATE=$(extract_fm "date")
FM_NUMBERSECTIONS=$(extract_fm "numbersections")
FM_NUMBERFROM=$(extract_fm "numberfrom")
FM_WATERMARK=$(extract_fm "watermark")
FM_PAGEBREAK=$(extract_fm "pagebreak")
fi
# --- CLI overrides frontmatter ---
[[ -n "$CLI_TITLE" ]] && FM_TITLE="$CLI_TITLE"
[[ -n "$CLI_SUBTITLE" ]] && FM_SUBTITLE="$CLI_SUBTITLE"
[[ -n "$CLI_AUTHOR" ]] && FM_AUTHOR="$CLI_AUTHOR"
[[ -n "$CLI_FOOTER" ]] && FM_FOOTER="$CLI_FOOTER"
[[ -n "$CLI_HEADER" ]] && FM_HEADER="$CLI_HEADER"
[[ -n "$CLI_DATE" ]] && FM_DATE="$CLI_DATE"
[[ -n "$CLI_WATERMARK" ]] && FM_WATERMARK="$CLI_WATERMARK"
[[ -n "$CLI_TOC_LEVEL" ]] && FM_TOC_LEVEL="$CLI_TOC_LEVEL"
[[ -n "$CLI_NUMBER_FROM" ]] && FM_NUMBERFROM="$CLI_NUMBER_FROM"
[[ -n "$CLI_NUMBERS" ]] && FM_NUMBERSECTIONS="$CLI_NUMBERS"
# --- Auto-detect document structure ---
# Count H1 headings (outside code blocks)
local H1_COUNT=0 IN_CODE_SCAN=0 FIRST_H1_TEXT=""
while IFS= read -r scanline || [[ -n "$scanline" ]]; do
[[ "$scanline" =~ ^\`\`\` ]] && { if [[ $IN_CODE_SCAN -eq 0 ]]; then IN_CODE_SCAN=1; else IN_CODE_SCAN=0; fi; continue; }
if [[ $IN_CODE_SCAN -eq 0 && "$scanline" =~ ^#\ ]]; then
H1_COUNT=$((H1_COUNT + 1))
[[ $H1_COUNT -eq 1 ]] && FIRST_H1_TEXT="${scanline#\# }"
fi
done < "$INPUT"
local FILE_TOC_LEVEL="${FM_TOC_LEVEL:-3}"
local FILE_NUMBERS=1
[[ "$FM_NUMBERSECTIONS" == "false" ]] && FILE_NUMBERS=0
local FILE_PAGEBREAK=1
[[ "$FM_PAGEBREAK" == "false" ]] && FILE_PAGEBREAK=0
# Auto-determine numberfrom based on structure (if not explicitly set)
local FILE_NUMBER_FROM="${FM_NUMBERFROM:-}"
local HIDE_FIRST_H1=0
if [[ -z "$FILE_NUMBER_FROM" ]]; then
if [[ $H1_COUNT -eq 1 ]]; then
# Single H1 = document title; number from H2, hide H1 in body
FILE_NUMBER_FROM=2
HIDE_FIRST_H1=1
# Use H1 text as title if no title set
[[ -z "$FM_TITLE" ]] && FM_TITLE="$FIRST_H1_TEXT"
detail "Auto: ${DIM}single H1 detected → using as title, numbering from H2${RESET}"
else
# Multiple H1s = sections; number from H1
FILE_NUMBER_FROM=1
detail "Auto: ${DIM}${H1_COUNT} H1s detected → numbering from H1${RESET}"
fi
fi
# Default date: current date/time
# Set to "none" in frontmatter or --date to suppress
FM_DATE_HASH="${FM_DATE_HASH:-}"
FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
if [[ "$FM_DATE" == "none" || "$FM_DATE" == "false" ]]; then
FM_DATE=""
elif [[ -z "$FM_DATE" && -z "$CLI_DATE" ]]; then
FM_DATE="$(date +"%Y-%m-%d %H:%M")"
fi
echo ""
[[ -n "$FM_TITLE" ]] && detail "Title: ${CYAN}${FM_TITLE}${RESET}"
[[ -n "$FM_SUBTITLE" ]] && detail "Subtitle: ${CYAN}${FM_SUBTITLE}${RESET}"
[[ -n "$FM_AUTHOR" ]] && detail "Author: ${CYAN}${FM_AUTHOR}${RESET}"
[[ -n "$FM_HEADER" ]] && detail "Header: ${CYAN}${FM_HEADER}${RESET}"
[[ -n "$FM_FOOTER" ]] && detail "Footer: ${CYAN}${FM_FOOTER}${RESET}"
detail "Date: ${CYAN}${FM_DATE}${RESET}"
detail "TOC: ${CYAN}level ${FILE_TOC_LEVEL}${RESET}"
detail "Numbered: ${CYAN}$([ $FILE_NUMBERS -eq 1 ] && echo "yes (from H${FILE_NUMBER_FROM})" || echo no)${RESET}"
[[ -n "$FM_WATERMARK" ]] && detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
# --- Git hash for source file ---
local GIT_STAMP=""
# --- Discover images referenced in the markdown ---
echo ""
info "Scanning ${CYAN}${INPUT_FILE}${RESET} for assets..."
IMAGES=()
while IFS= read -r img; do
[[ -z "$img" ]] && continue
[[ "$img" =~ ^https?:// ]] && continue
if [[ -f "$INPUT_DIR/$img" ]]; then
IMAGES+=("$img")
success "Image: ${CYAN}${img}${RESET} ${DIM}($(du -h "$INPUT_DIR/$img" | cut -f1 | tr -d ' '))${RESET}"
else
warn "Image: ${YELLOW}${img}${RESET} ${RED}(not found)${RESET}"
fi
done < <(sed -n 's/.*!\[[^]]*\](\([^)]*\)).*/\1/p' "$INPUT"; sed -n 's/.*src="\([^"]*\)".*/\1/p' "$INPUT")
MERMAID_COUNT=$(grep -c '```mermaid' "$INPUT" || true)
if [[ $MERMAID_COUNT -gt 0 ]]; then
success "Mermaid diagrams: ${CYAN}${MERMAID_COUNT}${RESET}"
fi
CALLOUT_COUNT=$(grep -c '> \[!' "$INPUT" || true)
if [[ $CALLOUT_COUNT -gt 0 ]]; then
success "Callouts: ${CYAN}${CALLOUT_COUNT}${RESET}"
fi
TABLE_COUNT=$(grep -c '^|' "$INPUT" || true)
CODE_COUNT=$(grep -c '```' "$INPUT" || true)
CODE_COUNT=$(( (CODE_COUNT - MERMAID_COUNT * 2) / 2 ))
[[ $TABLE_COUNT -gt 0 ]] && detail "Tables: ${TABLE_COUNT} rows"
[[ $CODE_COUNT -gt 0 ]] && detail "Code blocks: ~${CODE_COUNT}"
echo ""
info "Found ${GREEN}${#IMAGES[@]}${RESET} image(s), ${GREEN}${MERMAID_COUNT}${RESET} mermaid diagram(s), ${GREEN}${CALLOUT_COUNT}${RESET} callout(s)"
# --- Write the conversion script to a temp file (mounted into Docker) ---
CONVERT_SCRIPT="${INPUT_DIR}/.pdfify-convert-$$.sh"
trap 'rm -f "$CONVERT_SCRIPT"' EXIT
cat > "$CONVERT_SCRIPT" <<'INNER_SCRIPT'
#!/bin/bash
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; }
success() { echo -e "${GREEN}✓${RESET} $*"; }
detail() { echo -e " ${DIM}→${RESET} $*"; }
INPUT_FILE="$1"
OUTPUT_FILE="$2"
WORKDIR="/work"
cd "$WORKDIR"
# --- Step 0: Strip first H1 if it's being used as document title ---
HIDE_FIRST_H1="${HIDE_FIRST_H1:-0}"
EFFECTIVE_INPUT="$INPUT_FILE"
if [[ "$HIDE_FIRST_H1" == "1" ]]; then
STRIPPED=$(mktemp /tmp/pdfify-stripped-XXXXXX.md)
FOUND_H1=0
IN_CODE_BLK=0
IN_FMATTER=0
while IFS= read -r line || [[ -n "$line" ]]; do
[[ "$line" =~ ^\`\`\` ]] && { if [[ $IN_CODE_BLK -eq 0 ]]; then IN_CODE_BLK=1; else IN_CODE_BLK=0; fi; }
if [[ "$line" == "---" && $IN_CODE_BLK -eq 0 ]]; then
if [[ $IN_FMATTER -eq 0 && $FOUND_H1 -eq 0 ]]; then IN_FMATTER=1; else IN_FMATTER=0; fi
fi
# Skip the first H1 (and any blank line immediately after)
if [[ $FOUND_H1 -eq 0 && $IN_CODE_BLK -eq 0 && $IN_FMATTER -eq 0 && "$line" =~ ^#\ ]]; then
FOUND_H1=1
continue
fi
# Skip blank line right after removed H1
if [[ $FOUND_H1 -eq 1 && -z "$line" ]]; then
FOUND_H1=2
continue
fi
[[ $FOUND_H1 -eq 1 ]] && FOUND_H1=2
echo "$line" >> "$STRIPPED"
done < "$INPUT_FILE"
EFFECTIVE_INPUT="$(basename "$STRIPPED")"
detail "Stripped first H1 (promoted to title)"
fi
# --- Step 1: Pre-process Obsidian callouts ---
info "Pre-processing callouts..."
CALLOUT_MD=$(mktemp /tmp/pdfify-callout-XXXXXX.md)
IN_CALLOUT=0
CALLOUT_TYPE=""
CALLOUT_TITLE=""
CALLOUT_BUF=""
CALLOUT_COUNT=0
flush_callout() {
if [[ $IN_CALLOUT -eq 1 && -n "$CALLOUT_TYPE" ]]; then
CALLOUT_COUNT=$((CALLOUT_COUNT + 1))
local latex_type
case "${CALLOUT_TYPE,,}" in
info|note) latex_type="calloutinfo" ;;
tip|hint) latex_type="callouttip" ;;
warning|caution) latex_type="calloutwarning" ;;
danger|error|bug) latex_type="calloutdanger" ;;
example) latex_type="calloutexample" ;;
quote|cite) latex_type="calloutquote" ;;
*) latex_type="calloutinfo" ;;
esac
echo "" >> "$CALLOUT_MD"
echo '```{=latex}' >> "$CALLOUT_MD"
echo "\\begin{${latex_type}}{${CALLOUT_TITLE}}" >> "$CALLOUT_MD"
echo '```' >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
echo "$CALLOUT_BUF" >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
echo '```{=latex}' >> "$CALLOUT_MD"
echo "\\end{${latex_type}}" >> "$CALLOUT_MD"
echo '```' >> "$CALLOUT_MD"
echo "" >> "$CALLOUT_MD"
fi
IN_CALLOUT=0
CALLOUT_TYPE=""
CALLOUT_TITLE=""
CALLOUT_BUF=""
}
while IFS= read -r line || [[ -n "$line" ]]; do
if [[ "$line" =~ ^\>\ *\[!([a-zA-Z]+)\]\ *(.*) ]]; then
flush_callout
IN_CALLOUT=1
CALLOUT_TYPE="${BASH_REMATCH[1]}"
CALLOUT_TITLE="${BASH_REMATCH[2]:-${BASH_REMATCH[1]^}}"
continue
fi
if [[ $IN_CALLOUT -eq 1 ]]; then
if [[ "$line" =~ ^\>\ ?(.*) ]]; then
CALLOUT_BUF="${CALLOUT_BUF}${BASH_REMATCH[1]}
"
continue
else
flush_callout
fi
fi
echo "$line" >> "$CALLOUT_MD"
done < "${STRIPPED:-$INPUT_FILE}"
flush_callout
if [[ $CALLOUT_COUNT -gt 0 ]]; then
success "Converted $CALLOUT_COUNT callout(s)"
fi
# --- Step 1b+1c: Inject page breaks (after TOC, before each H1) ---
BREAK_INJECTED=$(mktemp /tmp/pdfify-breaks-XXXXXX.md)
H1_COUNT=0
IN_FM=0
IN_CODE=0
DONE_TOC_BREAK=0
while IFS= read -r line || [[ -n "$line" ]]; do
# Track code blocks (``` opens/closes)
if [[ "$line" =~ ^\`\`\` ]]; then
if [[ $IN_CODE -eq 0 ]]; then IN_CODE=1; else IN_CODE=0; fi
echo "$line" >> "$BREAK_INJECTED"
continue
fi
# Track frontmatter (only at start of file)
if [[ "$line" == "---" && $IN_CODE -eq 0 ]]; then
if [[ $IN_FM -eq 0 && $H1_COUNT -eq 0 ]]; then IN_FM=1; else IN_FM=0; fi
echo "$line" >> "$BREAK_INJECTED"
continue
fi
if [[ $IN_CODE -eq 0 && $IN_FM -eq 0 ]]; then
# Before first content after frontmatter: inject TOC page break
if [[ $DONE_TOC_BREAK -eq 0 && "$TOC_LEVEL" -gt 0 && -n "$line" ]]; then
echo "" >> "$BREAK_INJECTED"
echo '```{=latex}' >> "$BREAK_INJECTED"
echo '\newpage' >> "$BREAK_INJECTED"
echo '```' >> "$BREAK_INJECTED"
echo "" >> "$BREAK_INJECTED"
DONE_TOC_BREAK=1
fi
# Page break before each top-level section (except first)
# Build the marker: numberfrom=1 → "# ", numberfrom=2 → "## "
BREAK_HASHES=$(printf '#%.0s' $(seq 1 "$FILE_NUMBER_FROM"))
if [[ "$line" == "${BREAK_HASHES} "* ]]; then
# Make sure it's exactly that level, not deeper
NEXT_CHAR="${line:${#BREAK_HASHES}:1}"
if [[ "$NEXT_CHAR" != "#" ]]; then
H1_COUNT=$((H1_COUNT + 1))
if [[ $H1_COUNT -gt 1 && $FILE_PAGEBREAK -eq 1 ]]; then
echo "" >> "$BREAK_INJECTED"
echo '```{=latex}' >> "$BREAK_INJECTED"
echo '\newpage' >> "$BREAK_INJECTED"
echo '```' >> "$BREAK_INJECTED"
echo "" >> "$BREAK_INJECTED"
fi
fi
fi
fi
echo "$line" >> "$BREAK_INJECTED"
done < "$CALLOUT_MD"
rm -f "$CALLOUT_MD"
CALLOUT_MD="$BREAK_INJECTED"
# --- Step 2: Pre-render Mermaid blocks to PNG ---
info "Pre-rendering Mermaid diagrams..."
TEMP_MD=$(mktemp /tmp/pdfify-XXXXXX.md)
MERMAID_COUNT=0
IN_MERMAID=0
MERMAID_BUF=""
while IFS= read -r line || [[ -n "$line" ]]; do
if [[ "$line" =~ ^\`\`\`mermaid ]]; then
IN_MERMAID=1
MERMAID_BUF=""
continue
fi
if [[ $IN_MERMAID -eq 1 ]]; then
if [[ "$line" =~ ^\`\`\` ]]; then
IN_MERMAID=0
MERMAID_COUNT=$((MERMAID_COUNT + 1))
MERMAID_FILE="/tmp/mermaid-${MERMAID_COUNT}.mmd"
MERMAID_PNG="/tmp/mermaid-${MERMAID_COUNT}.png"
echo "$MERMAID_BUF" > "$MERMAID_FILE"
detail "Rendering diagram ${CYAN}#${MERMAID_COUNT}${RESET}..."
mmdc -i "$MERMAID_FILE" \
-o "$MERMAID_PNG" \
-w 1600 \
-b transparent \
-c /opt/mermaid-config.json \
-p /opt/puppeteer-config.json \
2>/dev/null || {
echo -e " ${YELLOW}⚠${RESET} Diagram $MERMAID_COUNT failed — inserting as code block"
echo '```' >> "$TEMP_MD"
echo "$MERMAID_BUF" >> "$TEMP_MD"
echo '```' >> "$TEMP_MD"
continue
}
SIZE=$(du -h "$MERMAID_PNG" 2>/dev/null | cut -f1 | tr -d ' ')
success "Diagram #${MERMAID_COUNT} rendered ${DIM}(${SIZE})${RESET}"
echo "" >> "$TEMP_MD"
echo "\\" >> "$TEMP_MD"
echo "" >> "$TEMP_MD"
else
MERMAID_BUF="${MERMAID_BUF}${line}
"
fi
else
echo "$line" >> "$TEMP_MD"
fi
done < "$CALLOUT_MD"
# --- Strip YAML frontmatter so pandoc doesn't generate its own title ---
# pdfify already parses frontmatter above; letting pandoc see it causes a
# duplicate title (pandoc's \maketitle + pdfify's custom title banner).
if head -1 "$TEMP_MD" | grep -q '^---'; then
STRIPPED_FM=$(mktemp /tmp/pdfify-nofm-XXXXXX.md)
awk 'NR==1 && /^---/{skip=1; next} skip && /^---/{skip=0; next} !skip' "$TEMP_MD" > "$STRIPPED_FM"
mv "$STRIPPED_FM" "$TEMP_MD"
fi
# --- Lua filter: protect brackets in headings for titlesec ---
# Square brackets in headings break titlesec (\SQSPL@scan error) because LaTeX
# interprets [ as the start of an optional argument.
BRACKET_FILTER=$(mktemp /tmp/pdfify-bracket-filter-XXXXXX.lua)
cat > "$BRACKET_FILTER" <<'LUAFILTER'
-- Protect square brackets in headings to prevent titlesec \SQSPL@scan errors.
-- Brackets in headings make titlesec think they are optional arguments.
-- We replace [ and ] with \lbrack/\rbrack in all inline types.
function Header(el)
if FORMAT ~= "latex" and FORMAT ~= "pdf" then return nil end
el = el:walk {
Str = function(s)
if s.text:find("[%[%]]") then
local t = s.text:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
return pandoc.RawInline("latex", t)
end
end,
Code = function(c)
-- All code in headings must use \oldtexttt to bypass seqsplit
-- (seqsplit in titlesec moving arguments causes \SQSPL@scan errors)
local t = c.text
t = t:gsub("\\", "\\textbackslash ")
t = t:gsub("%%", "\\%%")
t = t:gsub("%#", "\\#")
t = t:gsub("%$", "\\$")
t = t:gsub("%&", "\\&")
t = t:gsub("_", "\\_")
t = t:gsub("%{", "\\{")
t = t:gsub("%}", "\\}")
t = t:gsub("~", "\\textasciitilde{}")
t = t:gsub("%^", "\\textasciicircum{}")
t = t:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}")
return pandoc.RawInline("latex", "\\oldtexttt{" .. t .. "}")
end
}
return el
end
LUAFILTER
echo ""
info "Generating PDF with Pandoc + XeLaTeX..."
detail "Engine: xelatex"
detail "Font: Roboto / Roboto Mono"
detail "Margins: 0.5in, Font size: 10pt"
echo ""
# Write LaTeX preamble for modern styling
PREAMBLE=$(mktemp /tmp/pdfify-preamble-XXXXXX.tex)
cat > "$PREAMBLE" <<'LATEX'
% --- Modern color scheme ---
\usepackage{xcolor}
\definecolor{accent}{HTML}{374151}
\definecolor{accentdark}{HTML}{111827}
\definecolor{codebg}{HTML}{F8F9FA}
\definecolor{codeborder}{HTML}{E2E8F0}
\definecolor{headrulecolor}{HTML}{E2E8F0}
% --- Callout colors ---
\definecolor{infobg}{HTML}{EFF6FF}
\definecolor{infobar}{HTML}{3B82F6}
\definecolor{infofg}{HTML}{1E40AF}
\definecolor{tipbg}{HTML}{F0FDF4}
\definecolor{tipbar}{HTML}{22C55E}
\definecolor{tipfg}{HTML}{166534}
\definecolor{warningbg}{HTML}{FFFBEB}
\definecolor{warningbar}{HTML}{F59E0B}
\definecolor{warningfg}{HTML}{92400E}
\definecolor{dangerbg}{HTML}{FEF2F2}
\definecolor{dangerbar}{HTML}{EF4444}
\definecolor{dangerfg}{HTML}{991B1B}
\definecolor{examplebg}{HTML}{F5F3FF}
\definecolor{examplebar}{HTML}{8B5CF6}
\definecolor{examplefg}{HTML}{5B21B6}
\definecolor{quotecallbg}{HTML}{F8F9FA}
\definecolor{quotecallbar}{HTML}{6B7280}
\definecolor{quotecallfg}{HTML}{374151}
% --- Code block wrapping and styling ---
\usepackage{fvextra}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{
breaklines,
breakanywhere,
commandchars=\\\{\},
fontsize=\small
}
% Background on code blocks via mdframed
\usepackage[framemethod=tikz]{mdframed}
% Override pandoc's Shaded environment (define first if pandoc didn't)
\makeatletter
\@ifundefined{Shaded}{\newenvironment{Shaded}{}{}}{}
\makeatother
\renewenvironment{Shaded}{%
\begin{mdframed}[
backgroundcolor=codebg,
hidealllines=true,
roundcorner=4pt,
innertopmargin=8pt,
innerbottommargin=8pt,
innerleftmargin=10pt,
innerrightmargin=10pt,
skipabove=10pt,
skipbelow=10pt
]
}{%
\end{mdframed}
}
% --- Callout environments ---
\newenvironment{calloutbase}[3]{%
\begin{mdframed}[
backgroundcolor=#1,
linecolor=#2,
linewidth=3pt,
topline=false,
bottomline=false,
rightline=false,
innertopmargin=12pt,
innerbottommargin=12pt,
innerleftmargin=12pt,
innerrightmargin=12pt,
skipabove=12pt,
skipbelow=12pt,
roundcorner=0pt
]
\textbf{\color{#2}#3}\par\smallskip\setlength{\parindent}{0pt}
}{%
\end{mdframed}
}
\newenvironment{calloutinfo}[1]{\begin{calloutbase}{infobg}{infobar}{#1}}{\end{calloutbase}}
\newenvironment{callouttip}[1]{\begin{calloutbase}{tipbg}{tipbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutwarning}[1]{\begin{calloutbase}{warningbg}{warningbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutdanger}[1]{\begin{calloutbase}{dangerbg}{dangerbar}{#1}}{\end{calloutbase}}
\newenvironment{calloutexample}[1]{\begin{calloutbase}{examplebg}{examplebar}{#1}}{\end{calloutbase}}
\newenvironment{calloutquote}[1]{\begin{calloutbase}{quotecallbg}{quotecallbar}{#1}}{\end{calloutbase}}
% --- PDF bookmarks (sidebar navigation in PDF viewers) ---
\usepackage{bookmark}
\bookmarksetup{
numbered=false,
open,
openlevel=2
}
% --- Title banner ---
\definecolor{titlebg}{HTML}{E5E7EB}
% --- Page break after TOC ---
\let\oldtableofcontents\tableofcontents
\renewcommand{\tableofcontents}{\oldtableofcontents\clearpage}
% --- TOC styling ---
\usepackage{tocloft}
\setlength{\cftbeforetoctitleskip}{0.5em}
\renewcommand{\cfttoctitlefont}{\LARGE\bfseries\color{accentdark}\scshape}
\renewcommand{\cftaftertoctitle}{\par\vspace{2pt}{\color{headrulecolor}\hrule height 1pt}\vspace{10pt}}
\renewcommand{\cftsecfont}{\bfseries\color{accentdark}}
\renewcommand{\cftsecpagefont}{\bfseries\color{accentdark}}
\renewcommand{\cftsubsecfont}{\color{accent}}
\renewcommand{\cftsubsecpagefont}{\color{accent}}
\renewcommand{\cftsubsubsecfont}{\small\color{accent}}
\renewcommand{\cftsubsubsecpagefont}{\small\color{accent}}
\renewcommand{\cftsecleader}{\cftdotfill{\cftsecdotsep}}
\renewcommand{\cftsecdotsep}{\cftdotsep}
\setlength{\cftbeforesecskip}{6pt}
\setlength{\cftbeforesubsecskip}{2pt}
% --- Heading font ---
\newfontfamily\headingfont{Roboto}[BoldFont={Roboto Bold}]
% --- Symbol fallback (arrows, etc.) ---
\usepackage{newunicodechar}
\newfontfamily\fallbackfont{Liberation Sans}[Scale=MatchLowercase]
\newunicodechar{→}{{\fallbackfont →}}
\newunicodechar{←}{{\fallbackfont ←}}
\newunicodechar{↔}{{\fallbackfont ↔}}
\newunicodechar{⇒}{{\fallbackfont ⇒}}
\newunicodechar{⇐}{{\fallbackfont ⇐}}
\newunicodechar{✓}{{\fallbackfont ✓}}
\newunicodechar{✗}{{\fallbackfont ✗}}
% --- Modern section headings (tight, bold, dark) ---
\usepackage{titlesec}
% H1: # headings — large, small caps, dark, with rule
\titleformat{\section}
{\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
{\thesection}{0.5em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
\titlespacing*{\section}{0pt}{20pt}{10pt}
% H2: ## headings
\titleformat{\subsection}
{\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
{\thesubsection}{0.5em}{}
\titlespacing*{\subsection}{0pt}{16pt}{8pt}
% H3: ### headings
\titleformat{\subsubsection}
{\large\bfseries\color{accent}}
{\thesubsubsection}{0.5em}{}
\titlespacing*{\subsubsection}{0pt}{12pt}{6pt}
% H4: #### headings
\titleformat{\paragraph}[hang]
{\normalsize\bfseries\color{accent}}
{\theparagraph}{0.5em}{}
\titlespacing*{\paragraph}{0pt}{10pt}{4pt}
%%SECNUMDEPTH_PLACEHOLDER%%
% --- Page style (header/footer injected by pdfify) ---
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{}
\renewcommand{\headrulewidth}{0pt}
\renewcommand{\footrulewidth}{0pt}
\setlength{\headheight}{14pt}
%%HEADER_PLACEHOLDER%%
%%FOOTER_PLACEHOLDER%%
% Make plain style identical to fancy (so title/TOC pages get the same footer)
\fancypagestyle{plain}{\fancyhf{}\renewcommand{\headrulewidth}{0pt}\renewcommand{\footrulewidth}{0pt}%%FOOTER_PLAIN%%}
% --- Blockquote styling (plain > quotes, not callouts) ---
\usepackage{etoolbox}
\renewenvironment{quote}{%
\begin{mdframed}[
backgroundcolor=infobg,
linecolor=infobar,
linewidth=3pt,
topline=false,
bottomline=false,
rightline=false,
innertopmargin=12pt,
innerbottommargin=12pt,
innerleftmargin=12pt,
innerrightmargin=12pt,
skipabove=10pt,
skipbelow=10pt,
roundcorner=0pt
]%
}{%
\end{mdframed}%
}
% --- Table styling ---
\usepackage{booktabs}
\usepackage{colortbl}
\usepackage{longtable}
\usepackage{tabularx}
\arrayrulecolor{codeborder}
% Alternating row shading
\definecolor{tablerowgray}{HTML}{F3F4F6}
\let\oldlongtable\longtable
\let\endoldlongtable\endlongtable
\renewenvironment{longtable}{\rowcolors{2}{white}{tablerowgray}\oldlongtable}{\endoldlongtable}
% Allow line breaks in table cells and shrink monospace to fit
\usepackage{array}
\renewcommand{\arraystretch}{1.4}
\let\oldtexttt\texttt
\renewcommand{\texttt}[1]{{\small\oldtexttt{\seqsplit{#1}}}}
\usepackage{seqsplit}
\setlength{\tabcolsep}{4pt}
% --- Images constrained to page ---
\usepackage{grffile}
\usepackage[export]{adjustbox}
\let\oldincludegraphics\includegraphics
\renewcommand{\includegraphics}[2][]{%
\oldincludegraphics[max width=\textwidth,max height=0.45\textheight,keepaspectratio,#1]{#2}%
}
% --- Figures don't float ---
\usepackage{float}
\floatplacement{figure}{H}
% --- Caption styling ---
\usepackage{caption}
\captionsetup{labelformat=empty,font={small,color=gray},skip=4pt}
% --- Tighter lists ---
\usepackage{enumitem}
\setlist{nosep,leftmargin=1.5em}
% --- Links ---
\usepackage{hyperref}
\hypersetup{
colorlinks=true,
linkcolor=accent,
urlcolor=accent,
citecolor=accent
}
% --- Horizontal rules ---
\renewcommand{\rule}[2]{\textcolor{headrulecolor}{\vrule width \textwidth height 0.5pt}}
LATEX
TOC_LEVEL="${TOC_LEVEL:-3}"
FM_FOOTER="${FM_FOOTER:-}"
FM_HEADER="${FM_HEADER:-}"
FM_AUTHOR="${FM_AUTHOR:-}"
FM_DATE="${FM_DATE:-}"
FM_DATE_LABEL="${FM_DATE_LABEL:-}"
FM_DATE_HASH="${FM_DATE_HASH:-}"
FM_DATE_DIRTY="${FM_DATE_DIRTY:-}"
FILE_NUMBERS="${FILE_NUMBERS:-1}"
FILE_NUMBER_FROM="${FILE_NUMBER_FROM:-2}"
FILE_PAGEBREAK="${FILE_PAGEBREAK:-1}"
# Escape LaTeX special characters in text fields (uses sed to avoid
# bash parameter substitution brace-parsing issues with } in replacements)
latex_escape() {
printf '%s' "$1" | sed \
-e 's/\\/@@BSLASH@@/g' \
-e 's/&/\\&/g' \
-e 's/%/\\%/g' \
-e 's/\$/\\$/g' \
-e 's/#/\\#/g' \
-e 's/_/\\_/g' \
-e 's/{/\\{/g' \
-e 's/}/\\}/g' \
-e 's/~/\\textasciitilde{}/g' \
-e 's/\^/\\textasciicircum{}/g' \
-e 's/@@BSLASH@@/\\textbackslash{}/g'
}
# Inject title banner into preamble
FM_TITLE="${FM_TITLE:-}"
FM_TITLE_TEX="$(latex_escape "$FM_TITLE")"
FM_SUBTITLE_TEX="$(latex_escape "${FM_SUBTITLE:-}")"
FM_AUTHOR_TEX="$(latex_escape "${FM_AUTHOR:-}")"
{
if [[ -n "$FM_TITLE" ]]; then
cat <<'TITLE_STATIC'
\makeatletter
\renewcommand{\maketitle}{%
\thispagestyle{fancy}%
\vspace*{-\topskip}%
\vspace*{-\headsep}%
\vspace*{-\headheight}%
\vspace*{-0.55in}%
\noindent\hspace*{-0.5in}%
\fcolorbox{titlebg}{titlebg}{%
\parbox{\dimexpr\paperwidth-2\fboxsep-2\fboxrule}{%
\hspace*{0.3in}\begin{minipage}{\dimexpr\textwidth}%
\vspace{20pt}%
TITLE_STATIC
echo " {\\fontsize{28}{34}\\selectfont\\bfseries\\color{black}${FM_TITLE_TEX}}\\\\[6pt]%"
FM_SUBTITLE="${FM_SUBTITLE:-}"
if [[ -n "$FM_SUBTITLE" ]]; then
echo " {\\fontsize{14}{18}\\selectfont\\color{black}${FM_SUBTITLE_TEX}}\\\\[8pt]%"
fi
if [[ -n "$FM_AUTHOR" ]]; then
echo " {\\fontsize{11}{14}\\selectfont\\color{black}${FM_AUTHOR_TEX}}\\\\[6pt]%"
fi
if [[ -n "$FM_DATE" ]]; then
DATE_VAL=""
if [[ -n "$FM_DATE_HASH" ]]; then
DATE_VAL="${FM_DATE% · *} · {\\texttt{${FM_DATE_HASH}}}"
else
DATE_VAL="${FM_DATE}"
fi
DIRTY_PART=""
if [[ -n "${FM_DATE_DIRTY:-}" ]]; then
DIRTY_PART=" {\\color{gray}\\itshape (dirty)}"
fi
if [[ -n "$FM_DATE_LABEL" ]]; then
echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL} {\\color{gray}--- ${FM_DATE_LABEL}}${DIRTY_PART}}\\\\[4pt]%"
else
echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL}${DIRTY_PART}}\\\\[4pt]%"
fi
fi
cat <<'TITLE_END'
\vspace{6pt}%
\end{minipage}%
}%
}%
\par\vspace{20pt}%
}
\makeatother
TITLE_END
echo '\AtBeginDocument{\maketitle}'
else
echo '\renewcommand{\maketitle}{}'
fi
} >> "$PREAMBLE"
# Inject header/footer into preamble
GIT_STAMP="${GIT_STAMP:-}"
FOOTER_L=""
FOOTER_C=""
FOOTER_R="\\\\fancyfoot[R]{\\\\color{gray}\\\\small Page \\\\thepage\\\\ of \\\\pageref*{LastPage}}"
[[ -n "$FM_FOOTER" ]] && FOOTER_L="\\\\fancyfoot[L]{\\\\color{gray}\\\\small ${FM_FOOTER}}"
sed -i "s|%%FOOTER_PLACEHOLDER%%|\\\\usepackage{lastpage}${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
sed -i "s|%%FOOTER_PLAIN%%|${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE"
if [[ -n "$FM_HEADER" ]]; then
sed -i "s|%%HEADER_PLACEHOLDER%%|\\\\fancyhead[C]{\\\\color{gray}\\\\small ${FM_HEADER}}|" "$PREAMBLE"
else
sed -i "s|%%HEADER_PLACEHOLDER%%||" "$PREAMBLE"
fi
# Inject watermark if set
FM_WATERMARK="${FM_WATERMARK:-}"
if [[ -n "$FM_WATERMARK" ]]; then
cat >> "$PREAMBLE" <<WATERMARK
\\usepackage{eso-pic}
\\usepackage{tikz}
\\AddToShipoutPictureFG{%
\\begin{tikzpicture}[remember picture,overlay]
\\node[rotate=45,opacity=0.12,scale=10,text=red] at (current page.center) {\\textsf{\\textbf{\\MakeUppercase{${FM_WATERMARK}}}}};
\\end{tikzpicture}%
}
WATERMARK
detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}"
fi
# Build TOC flags
# When numbering is on, headings shift by -1, so TOC depth needs +1 to compensate
TOC_FLAGS=()
if [[ "$TOC_LEVEL" -gt 0 ]]; then
TOC_FLAGS+=(--toc --toc-depth="$TOC_LEVEL")
detail "TOC depth: ${CYAN}${TOC_LEVEL}${RESET}"
else
detail "TOC: ${DIM}disabled${RESET}"
fi
AUTHOR_FLAGS=()
if [[ -n "$FM_AUTHOR" ]]; then
AUTHOR_FLAGS+=(-M "author=$FM_AUTHOR")
fi
# Numbered sections
NUMBER_FLAGS=()
if [[ "$FILE_NUMBERS" == "1" ]]; then
NUMBER_FLAGS+=(--number-sections)
# numberfrom controls which heading level starts getting numbers
# pandoc: section=1, subsection=2, subsubsection=3
cat >> "$PREAMBLE" <<SECNUM
\\setcounter{secnumdepth}{4}
SECNUM
if [[ "$FILE_NUMBER_FROM" -ge 2 ]]; then
# H1 (\section) unnumbered, H2 numbered as 1, 2, 3
cat >> "$PREAMBLE" <<'SECNUM2'
\makeatletter
\renewcommand{\thesection}{}
\renewcommand{\thesubsection}{\arabic{subsection}}
\renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}}
% Remove section number from titleformat without changing style
\titleformat{\section}
{\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape}
{}{0em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}]
\makeatother
SECNUM2
fi
if [[ "$FILE_NUMBER_FROM" -ge 3 ]]; then
cat >> "$PREAMBLE" <<'SECNUM3'
\renewcommand{\thesubsection}{}
\renewcommand{\thesubsubsection}{\arabic{subsubsection}}
\titleformat{\subsection}
{\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}}
{}{0em}{}
SECNUM3
fi
fi
# Remove placeholder
sed -i 's|%%SECNUMDEPTH_PLACEHOLDER%%||' "$PREAMBLE"
pandoc "$TEMP_MD" \
-o "$OUTPUT_FILE" \
--pdf-engine=xelatex \
--lua-filter="$BRACKET_FILTER" \
--resource-path=".:$WORKDIR" \
--columns=72 \
-V geometry:"margin=0.5in,includehead,includefoot" \
-V fontsize=10pt \
-V mainfont="Roboto" \
-V monofont="Roboto Mono" \
"${TOC_FLAGS[@]}" \
"${AUTHOR_FLAGS[@]}" \
"${NUMBER_FLAGS[@]}" \
--highlight-style=tango \
-H "$PREAMBLE" \
--standalone
rm -f "$TEMP_MD" "$CALLOUT_MD" "$PREAMBLE" "${BRACKET_FILTER:-}" "${STRIPPED:-}" /tmp/mermaid-*.mmd /tmp/mermaid-*.png
PAGES=$(strings "$OUTPUT_FILE" 2>/dev/null | grep -c '/Type /Page' || echo "?")
SIZE=$(du -h "$OUTPUT_FILE" | cut -f1 | tr -d ' ')
success "PDF generated: ${CYAN}${SIZE}${RESET}, ~${CYAN}${PAGES}${RESET} pages"
INNER_SCRIPT
chmod +x "$CONVERT_SCRIPT"
# --- Run Docker ---
echo ""
info "Launching Docker container..."
detail "Mounting: ${CYAN}${INPUT_DIR}${RESET} → /work ${DIM}(read-only)${RESET}"
detail "Output: ${CYAN}${OUTPUT_DIR}${RESET} → /output"
echo ""
CONVERT_BASENAME="$(basename "$CONVERT_SCRIPT")"
docker run --rm \
-v "$INPUT_DIR:/work:ro" \
-v "$OUTPUT_DIR:/output" \
-e "TOC_LEVEL=$FILE_TOC_LEVEL" \
-e "FM_FOOTER=$FM_FOOTER" \
-e "FM_HEADER=$FM_HEADER" \
-e "FM_AUTHOR=$FM_AUTHOR" \
-e "FM_TITLE=$FM_TITLE" \
-e "FM_SUBTITLE=$FM_SUBTITLE" \
-e "FM_DATE=$FM_DATE" \
-e "FM_DATE_LABEL=${FM_DATE_LABEL:-}" \
-e "FM_DATE_HASH=${FM_DATE_HASH:-}" \
-e "FM_DATE_DIRTY=${FM_DATE_DIRTY:-}" \
-e "GIT_STAMP=${GIT_STAMP:-}" \
-e "FILE_NUMBERS=$FILE_NUMBERS" \
-e "FILE_NUMBER_FROM=$FILE_NUMBER_FROM" \
-e "HIDE_FIRST_H1=$HIDE_FIRST_H1" \
-e "FM_WATERMARK=$FM_WATERMARK" \
-e "FILE_PAGEBREAK=$FILE_PAGEBREAK" \
--tmpfs /tmp:exec \
"$IMAGE_NAME" "/work/$CONVERT_BASENAME" "$INPUT_FILE" "/output/$OUTPUT_FILE" \
|| {
echo ""
echo -e " ${RED}${BOLD}Error producing PDF.${RESET} Docker/pandoc exited with a non-zero status."
echo ""
return 1
}
# Move preview file to /tmp and clean up
if [[ -n "$PREVIEW_FINAL" ]]; then
mv "$OUTPUT" "$PREVIEW_FINAL"
OUTPUT="$PREVIEW_FINAL"
fi
echo ""
echo -e " ${GREEN}${BOLD}PDF created:${RESET} ${CYAN}${OUTPUT}${RESET}"
echo ""
# Open if requested
if [[ $OPEN -eq 1 ]]; then
open_pdf "$OUTPUT"
fi
}
# --- Process each input file ---
run_all() {
local FAILED=0
for input_file in "${POSITIONAL[@]}"; do
convert_file "$input_file" "$OUT_FILE" || FAILED=$((FAILED + 1))
done
if [[ $FAILED -eq 0 ]]; then
header "Complete! (${#POSITIONAL[@]} file(s))"
else
header "${FAILED} of ${#POSITIONAL[@]} file(s) failed"
fi
}
run_all
# --- Watch mode ---
if [[ $WATCH -eq 1 ]]; then
info "Watching for changes... ${DIM}(Ctrl+C to stop)${RESET}"
echo ""
# Get initial checksums (using a temp file instead of associative array for bash 3 compat)
CHECKSUM_FILE=$(mktemp)
trap 'rm -f "$CHECKSUM_FILE"' EXIT
for f in "${POSITIONAL[@]}"; do
fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
echo "$(_sha256 < "$fpath" | cut -d' ' -f1) $fpath" >> "$CHECKSUM_FILE"
done
while true; do
sleep 2
CHANGED=0
for f in "${POSITIONAL[@]}"; do
fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")"
NEW_HASH=$(_sha256 < "$fpath" | cut -d' ' -f1)
OLD_HASH=$(grep " $fpath\$" "$CHECKSUM_FILE" | cut -d' ' -f1)
if [[ "$NEW_HASH" != "$OLD_HASH" ]]; then
CHANGED=1
# Update stored checksum
grep -v " $fpath\$" "$CHECKSUM_FILE" > "${CHECKSUM_FILE}.tmp" || true
echo "$NEW_HASH $fpath" >> "${CHECKSUM_FILE}.tmp"
mv "${CHECKSUM_FILE}.tmp" "$CHECKSUM_FILE"
fi
done
if [[ $CHANGED -eq 1 ]]; then
echo ""
info "Change detected — rebuilding..."
echo ""
run_all
fi
done
fi
# Check for updates (runs after success, fast timeout)
check_for_update
| 1 | #!/usr/bin/env bash |
| 2 | set -euo pipefail |
| 3 | |
| 4 | # pdfify — Convert Markdown to beautiful PDF via Docker |
| 5 | # Supports: images, mermaid diagrams, tables, code blocks, Obsidian callouts |
| 6 | # Usage: ./pdfify <file.md> [file2.md ...] [options] |
| 7 | |
| 8 | VERSION="1.2.0" |
| 9 | IMAGE_NAME="pdfify" |
| 10 | GIST_ID="23f4514a1f0da1347d3f89926c23b68f" |
| 11 | GIST_RAW="https://gist.githubusercontent.com/jclement/${GIST_ID}/raw/pdfify.sh" |
| 12 | SELF="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")" |
| 13 | |
| 14 | # --- Colors --- |
| 15 | RED='\033[0;31m' |
| 16 | GREEN='\033[0;32m' |
| 17 | YELLOW='\033[0;33m' |
| 18 | BLUE='\033[0;34m' |
| 19 | MAGENTA='\033[0;35m' |
| 20 | CYAN='\033[0;36m' |
| 21 | BOLD='\033[1m' |
| 22 | DIM='\033[2m' |
| 23 | RESET='\033[0m' |
| 24 | |
| 25 | # --- Portable SHA-256 (macOS has shasum, Linux often has sha256sum) --- |
| 26 | _sha256() { shasum -a 256 "$@" 2>/dev/null || sha256sum "$@"; } |
| 27 | |
| 28 | info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; } |
| 29 | success() { echo -e "${GREEN}✓${RESET} $*"; } |
| 30 | warn() { echo -e "${YELLOW}⚠${RESET} $*"; } |
| 31 | detail() { echo -e " ${DIM}→${RESET} $*"; } |
| 32 | header() { echo -e "\n${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"; echo -e "${MAGENTA} ${BOLD}$*${RESET}"; echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"; } |
| 33 | |
| 34 | # --- Self-update --- |
| 35 | do_update() { |
| 36 | info "Updating pdfify from gist..." |
| 37 | local raw_url tmp |
| 38 | raw_url=$(curl -fsSL "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//') |
| 39 | [[ -z "$raw_url" ]] && raw_url="$GIST_RAW" |
| 40 | tmp=$(mktemp) |
| 41 | if curl -fsSL "$raw_url" -o "$tmp" 2>/dev/null; then |
| 42 | if [[ -s "$tmp" ]] && head -1 "$tmp" | grep -q '^#!/'; then |
| 43 | chmod +x "$tmp" |
| 44 | mv "$tmp" "$SELF" |
| 45 | success "Updated to latest version" |
| 46 | detail "${CYAN}${SELF}${RESET}" |
| 47 | else |
| 48 | rm -f "$tmp" |
| 49 | echo -e "${RED}Error:${RESET} Downloaded file doesn't look like a script" |
| 50 | exit 1 |
| 51 | fi |
| 52 | else |
| 53 | rm -f "$tmp" |
| 54 | echo -e "${RED}Error:${RESET} Failed to download update" |
| 55 | exit 1 |
| 56 | fi |
| 57 | exit 0 |
| 58 | } |
| 59 | |
| 60 | check_for_update() { |
| 61 | local remote_hash local_hash raw_url |
| 62 | raw_url=$(curl -fsSL --connect-timeout 2 --max-time 3 "https://api.github.com/gists/${GIST_ID}" 2>/dev/null | grep '"raw_url"' | head -1 | sed 's/.*"raw_url": *"//;s/".*//') || return 0 |
| 63 | [[ -z "$raw_url" ]] && return 0 |
| 64 | remote_hash=$(curl -fsSL --connect-timeout 2 --max-time 5 "$raw_url" 2>/dev/null | _sha256 | cut -d' ' -f1) || return 0 |
| 65 | local_hash=$(_sha256 < "$SELF" | cut -d' ' -f1) |
| 66 | if [[ -n "$remote_hash" && "$remote_hash" != "$local_hash" ]]; then |
| 67 | echo -e "${YELLOW}⚠${RESET} ${DIM}A newer version of pdfify is available. Run ${CYAN}pdfify --update${DIM} to upgrade.${RESET}" |
| 68 | fi |
| 69 | } |
| 70 | |
| 71 | # --- Args (CLI overrides frontmatter; "" means "use frontmatter default") --- |
| 72 | REBUILD=0 |
| 73 | WATCH=0 |
| 74 | OPEN=0 |
| 75 | PREVIEW=0 |
| 76 | OUT_FILE="" |
| 77 | NEXT_KEY="" |
| 78 | POSITIONAL=() |
| 79 | |
| 80 | # CLI overrides — empty string means "not set, defer to frontmatter" |
| 81 | CLI_TOC_LEVEL="" |
| 82 | CLI_NUMBERS="" |
| 83 | CLI_NUMBER_FROM="" |
| 84 | CLI_TITLE="" |
| 85 | CLI_SUBTITLE="" |
| 86 | CLI_AUTHOR="" |
| 87 | CLI_HEADER="" |
| 88 | CLI_FOOTER="" |
| 89 | CLI_DATE="" |
| 90 | CLI_WATERMARK="" |
| 91 | |
| 92 | for arg in "$@"; do |
| 93 | if [[ -n "$NEXT_KEY" ]]; then |
| 94 | case "$NEXT_KEY" in |
| 95 | toc-level) CLI_TOC_LEVEL="$arg" ;; |
| 96 | number-from) CLI_NUMBER_FROM="$arg" ;; |
| 97 | out) OUT_FILE="$arg" ;; |
| 98 | title) CLI_TITLE="$arg" ;; |
| 99 | subtitle) CLI_SUBTITLE="$arg" ;; |
| 100 | author) CLI_AUTHOR="$arg" ;; |
| 101 | header) CLI_HEADER="$arg" ;; |
| 102 | footer) CLI_FOOTER="$arg" ;; |
| 103 | date) CLI_DATE="$arg" ;; |
| 104 | watermark) CLI_WATERMARK="$arg" ;; |
| 105 | esac |
| 106 | NEXT_KEY="" |
| 107 | continue |
| 108 | fi |
| 109 | case "$arg" in |
| 110 | --rebuild) REBUILD=1 ;; |
| 111 | --update) do_update ;; |
| 112 | --watch) WATCH=1 ;; |
| 113 | --open) OPEN=1 ;; |
| 114 | --preview) PREVIEW=1; OPEN=1 ;; |
| 115 | --no-numbers) CLI_NUMBERS="false" ;; |
| 116 | --numbers) CLI_NUMBERS="true" ;; |
| 117 | --clean) echo -e "${BLUE}::${RESET} ${BOLD}Removing Docker image ${CYAN}${IMAGE_NAME}${RESET}..." |
| 118 | docker rmi "$IMAGE_NAME" >/dev/null 2>&1 && echo -e "${GREEN}✓${RESET} Image removed" || echo -e "${DIM}Image not found${RESET}" |
| 119 | exit 0 ;; |
| 120 | --toc-level) NEXT_KEY="toc-level" ;; |
| 121 | --toc-level=*) CLI_TOC_LEVEL="${arg#*=}" ;; |
| 122 | --number-from) NEXT_KEY="number-from" ;; |
| 123 | --number-from=*) CLI_NUMBER_FROM="${arg#*=}" ;; |
| 124 | --out) NEXT_KEY="out" ;; |
| 125 | --out=*) OUT_FILE="${arg#*=}" ;; |
| 126 | --title) NEXT_KEY="title" ;; |
| 127 | --title=*) CLI_TITLE="${arg#*=}" ;; |
| 128 | --subtitle) NEXT_KEY="subtitle" ;; |
| 129 | --subtitle=*) CLI_SUBTITLE="${arg#*=}" ;; |
| 130 | --author) NEXT_KEY="author" ;; |
| 131 | --author=*) CLI_AUTHOR="${arg#*=}" ;; |
| 132 | --header) NEXT_KEY="header" ;; |
| 133 | --header=*) CLI_HEADER="${arg#*=}" ;; |
| 134 | --footer) NEXT_KEY="footer" ;; |
| 135 | --footer=*) CLI_FOOTER="${arg#*=}" ;; |
| 136 | --date) NEXT_KEY="date" ;; |
| 137 | --date=*) CLI_DATE="${arg#*=}" ;; |
| 138 | --watermark) NEXT_KEY="watermark" ;; |
| 139 | --watermark=*) CLI_WATERMARK="${arg#*=}" ;; |
| 140 | --version) echo "pdfify v${VERSION}"; exit 0 ;; |
| 141 | --help|-h) echo -e "${BOLD}pdfify${RESET} v${VERSION} — Markdown to PDF" |
| 142 | echo "" |
| 143 | echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]" |
| 144 | echo "" |
| 145 | echo -e "${BOLD}Options:${RESET}" |
| 146 | echo -e " ${DIM}--out FILE${RESET} Output file (single input only)" |
| 147 | echo -e " ${DIM}--toc-level N${RESET} TOC depth: 0=none, 1=H1, 2=H2, 3=H3 (default: 3)" |
| 148 | echo -e " ${DIM}--numbers${RESET} Enable numbered headings (default)" |
| 149 | echo -e " ${DIM}--no-numbers${RESET} Disable numbered headings" |
| 150 | echo -e " ${DIM}--number-from N${RESET} Start numbering at heading level N (default: 2)" |
| 151 | echo -e " ${DIM}--open${RESET} Open PDF after generation" |
| 152 | echo -e " ${DIM}--preview${RESET} Render to /tmp and open (no permanent file)" |
| 153 | echo -e " ${DIM}--watch${RESET} Watch for changes and regenerate" |
| 154 | echo -e " ${DIM}--rebuild${RESET} Force rebuild the Docker image" |
| 155 | echo -e " ${DIM}--clean${RESET} Remove the Docker image" |
| 156 | echo -e " ${DIM}--update${RESET} Update pdfify to latest version from gist" |
| 157 | echo -e " ${DIM}--version${RESET} Show version" |
| 158 | echo "" |
| 159 | echo -e "${BOLD}Overrides${RESET} (CLI trumps frontmatter):" |
| 160 | echo -e " ${DIM}--title TEXT${RESET} ${DIM}--subtitle TEXT${RESET}" |
| 161 | echo -e " ${DIM}--author TEXT${RESET} ${DIM}--header TEXT${RESET}" |
| 162 | echo -e " ${DIM}--footer TEXT${RESET} ${DIM}--date TEXT${RESET}" |
| 163 | echo -e " ${DIM}--watermark TEXT${RESET}" |
| 164 | echo "" |
| 165 | echo -e "${BOLD}Frontmatter:${RESET}" |
| 166 | echo -e " title, subtitle, author, header, footer, toc-level, date," |
| 167 | echo -e " numbersections (true/false), numberfrom (1-4), watermark," |
| 168 | echo -e " pagebreak (true/false — page break before each top-level heading)" |
| 169 | exit 0 ;; |
| 170 | *) POSITIONAL+=("$arg") ;; |
| 171 | esac |
| 172 | done |
| 173 | |
| 174 | if [[ ${#POSITIONAL[@]} -lt 1 ]]; then |
| 175 | echo -e "${BOLD}Usage:${RESET} pdfify ${CYAN}<file.md> [file2.md ...]${RESET} [options]" |
| 176 | echo -e " Run ${CYAN}pdfify --help${RESET} for all options" |
| 177 | exit 1 |
| 178 | fi |
| 179 | |
| 180 | if [[ -n "$OUT_FILE" && ${#POSITIONAL[@]} -gt 1 ]]; then |
| 181 | echo -e "${RED}Error:${RESET} --out cannot be used with multiple input files" |
| 182 | exit 1 |
| 183 | fi |
| 184 | |
| 185 | # --- Open helper --- |
| 186 | open_pdf() { |
| 187 | local pdf="$1" |
| 188 | if command -v open >/dev/null 2>&1; then |
| 189 | open "$pdf" |
| 190 | elif command -v xdg-open >/dev/null 2>&1; then |
| 191 | xdg-open "$pdf" |
| 192 | fi |
| 193 | } |
| 194 | |
| 195 | header "pdfify v${VERSION}" |
| 196 | |
| 197 | # --- Embedded Dockerfile --- |
| 198 | DOCKERFILE=$(cat <<'DOCKERFILE_END' |
| 199 | FROM node:20-slim |
| 200 | ENV DEBIAN_FRONTEND=noninteractive |
| 201 | RUN apt-get update -qq && \ |
| 202 | apt-get install -y --no-install-recommends \ |
| 203 | pandoc \ |
| 204 | texlive-latex-recommended \ |
| 205 | texlive-latex-extra \ |
| 206 | texlive-fonts-recommended \ |
| 207 | texlive-fonts-extra \ |
| 208 | texlive-xetex \ |
| 209 | lmodern \ |
| 210 | librsvg2-bin \ |
| 211 | chromium \ |
| 212 | ca-certificates \ |
| 213 | fonts-liberation \ |
| 214 | fonts-roboto \ |
| 215 | fonts-roboto-unhinted \ |
| 216 | fonts-noto-color-emoji \ |
| 217 | wget \ |
| 218 | fontconfig \ |
| 219 | && rm -rf /var/lib/apt/lists/* |
| 220 | RUN mkdir -p /usr/share/fonts/truetype/roboto-mono && \ |
| 221 | for style in Regular Bold Italic BoldItalic Medium MediumItalic Light LightItalic; do \ |
| 222 | wget -q "https://github.com/googlefonts/RobotoMono/raw/main/fonts/ttf/RobotoMono-${style}.ttf" \ |
| 223 | -O "/usr/share/fonts/truetype/roboto-mono/RobotoMono-${style}.ttf" 2>/dev/null || true; \ |
| 224 | done && \ |
| 225 | fc-cache -f |
| 226 | RUN npm install -g @mermaid-js/mermaid-cli |
| 227 | ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true |
| 228 | ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium |
| 229 | ENV CHROME_PATH=/usr/bin/chromium |
| 230 | RUN echo '{"maxTextSize": 90000, "flowchart": {"useMaxWidth": true}, "theme": "base", "themeVariables": {"primaryColor": "#3B82F6", "primaryBorderColor": "#1E40AF", "primaryTextColor": "#1E293B", "lineColor": "#475569", "xyChart": {"backgroundColor": "transparent", "plotColorPalette": "#2563EB,#DC2626,#16A34A,#D97706,#9333EA,#0891B2"}}}' > /opt/mermaid-config.json |
| 231 | RUN echo '{"args": ["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]}' > /opt/puppeteer-config.json |
| 232 | WORKDIR /work |
| 233 | ENTRYPOINT ["/bin/bash"] |
| 234 | DOCKERFILE_END |
| 235 | ) |
| 236 | |
| 237 | # --- Build Docker image --- |
| 238 | echo "" |
| 239 | if [[ $REBUILD -eq 1 ]]; then |
| 240 | info "Removing existing Docker image ${CYAN}${IMAGE_NAME}${RESET}..." |
| 241 | docker rmi "$IMAGE_NAME" >/dev/null 2>&1 || true |
| 242 | success "Image removed" |
| 243 | fi |
| 244 | |
| 245 | if docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then |
| 246 | info "Docker image ${GREEN}${IMAGE_NAME}${RESET} found ${DIM}(cached)${RESET}" |
| 247 | success "Reusing existing image" |
| 248 | else |
| 249 | info "Building Docker image ${CYAN}${IMAGE_NAME}${RESET}..." |
| 250 | detail "Installing: pandoc, XeLaTeX, mermaid-cli, Chromium, fonts" |
| 251 | detail "This takes 2-3 minutes on first run (cached after)" |
| 252 | echo "" |
| 253 | BUILD_CTX=$(mktemp -d) |
| 254 | echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX" 2>&1 | while IFS= read -r line; do |
| 255 | if [[ "$line" =~ ^Step\ ([0-9]+)/([0-9]+) ]]; then |
| 256 | echo -e " ${CYAN}[${BASH_REMATCH[1]}/${BASH_REMATCH[2]}]${RESET} ${DIM}${line#*: }${RESET}" |
| 257 | elif [[ "$line" == *"Successfully tagged"* ]]; then |
| 258 | echo -e " ${GREEN}${line}${RESET}" |
| 259 | elif [[ "$line" == *"ERROR"* || "$line" == *"error"* ]]; then |
| 260 | echo -e " ${RED}${line}${RESET}" |
| 261 | fi |
| 262 | done |
| 263 | rm -rf "$BUILD_CTX" |
| 264 | if ! docker image inspect "$IMAGE_NAME" >/dev/null 2>&1; then |
| 265 | echo -e "\n${RED}Docker build failed. Re-running with full output:${RESET}\n" |
| 266 | BUILD_CTX=$(mktemp -d) |
| 267 | echo "$DOCKERFILE" | DOCKER_BUILDKIT=0 docker build -t "$IMAGE_NAME" -f - "$BUILD_CTX" |
| 268 | rm -rf "$BUILD_CTX" |
| 269 | exit 1 |
| 270 | fi |
| 271 | success "Docker image built" |
| 272 | fi |
| 273 | |
| 274 | # === Per-file conversion === |
| 275 | convert_file() { |
| 276 | local INPUT_PATH="$1" |
| 277 | local OUTPUT_OVERRIDE="$2" |
| 278 | |
| 279 | # --- Resolve paths --- |
| 280 | local INPUT INPUT_DIR INPUT_FILE OUTPUT OUTPUT_DIR OUTPUT_FILE |
| 281 | INPUT="$(cd "$(dirname "$INPUT_PATH")" && pwd)/$(basename "$INPUT_PATH")" |
| 282 | if [[ ! -f "$INPUT" ]]; then |
| 283 | echo -e "${RED}Error:${RESET} $INPUT_PATH not found" |
| 284 | return 1 |
| 285 | fi |
| 286 | INPUT_DIR="$(dirname "$INPUT")" |
| 287 | INPUT_FILE="$(basename "$INPUT")" |
| 288 | OUTPUT="${OUTPUT_OVERRIDE:-${INPUT%.md}.pdf}" |
| 289 | OUTPUT_DIR="$(cd "$(dirname "$OUTPUT")" 2>/dev/null && pwd || (mkdir -p "$(dirname "$OUTPUT")" && cd "$(dirname "$OUTPUT")" && pwd))" |
| 290 | OUTPUT="${OUTPUT_DIR}/$(basename "$OUTPUT")" |
| 291 | OUTPUT_FILE="$(basename "$OUTPUT")" |
| 292 | |
| 293 | # Preview mode: write temp file in input dir (Docker-mountable), move to /tmp after |
| 294 | local PREVIEW_FINAL="" |
| 295 | if [[ $PREVIEW -eq 1 ]]; then |
| 296 | local base="${INPUT_FILE%.md}" |
| 297 | PREVIEW_FINAL="/tmp/pdfify-preview-${base}.pdf" |
| 298 | OUTPUT_FILE=".pdfify-preview-${base}.pdf" |
| 299 | OUTPUT="${OUTPUT_DIR}/${OUTPUT_FILE}" |
| 300 | fi |
| 301 | |
| 302 | info "Input: ${CYAN}${INPUT}${RESET}" |
| 303 | if [[ -n "$PREVIEW_FINAL" ]]; then |
| 304 | info "Output: ${CYAN}${PREVIEW_FINAL}${RESET} ${DIM}(preview)${RESET}" |
| 305 | else |
| 306 | info "Output: ${CYAN}${OUTPUT}${RESET}" |
| 307 | fi |
| 308 | |
| 309 | # --- Parse YAML frontmatter --- |
| 310 | local FM_TITLE="" FM_SUBTITLE="" FM_AUTHOR="" FM_FOOTER="" FM_HEADER="" |
| 311 | local FM_TOC_LEVEL="" FM_DATE="" FM_NUMBERSECTIONS="" FM_NUMBERFROM="" FM_WATERMARK="" |
| 312 | local FM_DATE_HASH="" FM_DATE_DIRTY="" FM_DATE_LABEL="" |
| 313 | |
| 314 | if head -1 "$INPUT" | grep -q '^---'; then |
| 315 | local FM_BLOCK |
| 316 | FM_BLOCK=$(awk 'NR==1 && /^---/{found=1; next} found && /^---/{exit} found{print}' "$INPUT") |
| 317 | extract_fm() { echo "$FM_BLOCK" | sed -n "s/^$1:[[:space:]]*//p" | sed 's/^["'"'"']\(.*\)["'"'"']$/\1/'; } |
| 318 | FM_TITLE=$(extract_fm "title") |
| 319 | FM_AUTHOR=$(extract_fm "author") |
| 320 | FM_SUBTITLE=$(extract_fm "subtitle") |
| 321 | FM_FOOTER=$(extract_fm "footer") |
| 322 | FM_HEADER=$(extract_fm "header") |
| 323 | FM_TOC_LEVEL=$(extract_fm "toc-level") |
| 324 | FM_DATE=$(extract_fm "date") |
| 325 | FM_NUMBERSECTIONS=$(extract_fm "numbersections") |
| 326 | FM_NUMBERFROM=$(extract_fm "numberfrom") |
| 327 | FM_WATERMARK=$(extract_fm "watermark") |
| 328 | FM_PAGEBREAK=$(extract_fm "pagebreak") |
| 329 | fi |
| 330 | |
| 331 | # --- CLI overrides frontmatter --- |
| 332 | [[ -n "$CLI_TITLE" ]] && FM_TITLE="$CLI_TITLE" |
| 333 | [[ -n "$CLI_SUBTITLE" ]] && FM_SUBTITLE="$CLI_SUBTITLE" |
| 334 | [[ -n "$CLI_AUTHOR" ]] && FM_AUTHOR="$CLI_AUTHOR" |
| 335 | [[ -n "$CLI_FOOTER" ]] && FM_FOOTER="$CLI_FOOTER" |
| 336 | [[ -n "$CLI_HEADER" ]] && FM_HEADER="$CLI_HEADER" |
| 337 | [[ -n "$CLI_DATE" ]] && FM_DATE="$CLI_DATE" |
| 338 | [[ -n "$CLI_WATERMARK" ]] && FM_WATERMARK="$CLI_WATERMARK" |
| 339 | [[ -n "$CLI_TOC_LEVEL" ]] && FM_TOC_LEVEL="$CLI_TOC_LEVEL" |
| 340 | [[ -n "$CLI_NUMBER_FROM" ]] && FM_NUMBERFROM="$CLI_NUMBER_FROM" |
| 341 | [[ -n "$CLI_NUMBERS" ]] && FM_NUMBERSECTIONS="$CLI_NUMBERS" |
| 342 | |
| 343 | # --- Auto-detect document structure --- |
| 344 | # Count H1 headings (outside code blocks) |
| 345 | local H1_COUNT=0 IN_CODE_SCAN=0 FIRST_H1_TEXT="" |
| 346 | while IFS= read -r scanline || [[ -n "$scanline" ]]; do |
| 347 | [[ "$scanline" =~ ^\`\`\` ]] && { if [[ $IN_CODE_SCAN -eq 0 ]]; then IN_CODE_SCAN=1; else IN_CODE_SCAN=0; fi; continue; } |
| 348 | if [[ $IN_CODE_SCAN -eq 0 && "$scanline" =~ ^#\ ]]; then |
| 349 | H1_COUNT=$((H1_COUNT + 1)) |
| 350 | [[ $H1_COUNT -eq 1 ]] && FIRST_H1_TEXT="${scanline#\# }" |
| 351 | fi |
| 352 | done < "$INPUT" |
| 353 | |
| 354 | local FILE_TOC_LEVEL="${FM_TOC_LEVEL:-3}" |
| 355 | local FILE_NUMBERS=1 |
| 356 | [[ "$FM_NUMBERSECTIONS" == "false" ]] && FILE_NUMBERS=0 |
| 357 | local FILE_PAGEBREAK=1 |
| 358 | [[ "$FM_PAGEBREAK" == "false" ]] && FILE_PAGEBREAK=0 |
| 359 | |
| 360 | # Auto-determine numberfrom based on structure (if not explicitly set) |
| 361 | local FILE_NUMBER_FROM="${FM_NUMBERFROM:-}" |
| 362 | local HIDE_FIRST_H1=0 |
| 363 | if [[ -z "$FILE_NUMBER_FROM" ]]; then |
| 364 | if [[ $H1_COUNT -eq 1 ]]; then |
| 365 | # Single H1 = document title; number from H2, hide H1 in body |
| 366 | FILE_NUMBER_FROM=2 |
| 367 | HIDE_FIRST_H1=1 |
| 368 | # Use H1 text as title if no title set |
| 369 | [[ -z "$FM_TITLE" ]] && FM_TITLE="$FIRST_H1_TEXT" |
| 370 | detail "Auto: ${DIM}single H1 detected → using as title, numbering from H2${RESET}" |
| 371 | else |
| 372 | # Multiple H1s = sections; number from H1 |
| 373 | FILE_NUMBER_FROM=1 |
| 374 | detail "Auto: ${DIM}${H1_COUNT} H1s detected → numbering from H1${RESET}" |
| 375 | fi |
| 376 | fi |
| 377 | # Default date: current date/time |
| 378 | # Set to "none" in frontmatter or --date to suppress |
| 379 | FM_DATE_HASH="${FM_DATE_HASH:-}" |
| 380 | FM_DATE_DIRTY="${FM_DATE_DIRTY:-}" |
| 381 | if [[ "$FM_DATE" == "none" || "$FM_DATE" == "false" ]]; then |
| 382 | FM_DATE="" |
| 383 | elif [[ -z "$FM_DATE" && -z "$CLI_DATE" ]]; then |
| 384 | FM_DATE="$(date +"%Y-%m-%d %H:%M")" |
| 385 | fi |
| 386 | |
| 387 | echo "" |
| 388 | [[ -n "$FM_TITLE" ]] && detail "Title: ${CYAN}${FM_TITLE}${RESET}" |
| 389 | [[ -n "$FM_SUBTITLE" ]] && detail "Subtitle: ${CYAN}${FM_SUBTITLE}${RESET}" |
| 390 | [[ -n "$FM_AUTHOR" ]] && detail "Author: ${CYAN}${FM_AUTHOR}${RESET}" |
| 391 | [[ -n "$FM_HEADER" ]] && detail "Header: ${CYAN}${FM_HEADER}${RESET}" |
| 392 | [[ -n "$FM_FOOTER" ]] && detail "Footer: ${CYAN}${FM_FOOTER}${RESET}" |
| 393 | detail "Date: ${CYAN}${FM_DATE}${RESET}" |
| 394 | detail "TOC: ${CYAN}level ${FILE_TOC_LEVEL}${RESET}" |
| 395 | detail "Numbered: ${CYAN}$([ $FILE_NUMBERS -eq 1 ] && echo "yes (from H${FILE_NUMBER_FROM})" || echo no)${RESET}" |
| 396 | [[ -n "$FM_WATERMARK" ]] && detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}" |
| 397 | |
| 398 | # --- Git hash for source file --- |
| 399 | local GIT_STAMP="" |
| 400 | |
| 401 | # --- Discover images referenced in the markdown --- |
| 402 | echo "" |
| 403 | info "Scanning ${CYAN}${INPUT_FILE}${RESET} for assets..." |
| 404 | |
| 405 | IMAGES=() |
| 406 | while IFS= read -r img; do |
| 407 | [[ -z "$img" ]] && continue |
| 408 | [[ "$img" =~ ^https?:// ]] && continue |
| 409 | if [[ -f "$INPUT_DIR/$img" ]]; then |
| 410 | IMAGES+=("$img") |
| 411 | success "Image: ${CYAN}${img}${RESET} ${DIM}($(du -h "$INPUT_DIR/$img" | cut -f1 | tr -d ' '))${RESET}" |
| 412 | else |
| 413 | warn "Image: ${YELLOW}${img}${RESET} ${RED}(not found)${RESET}" |
| 414 | fi |
| 415 | done < <(sed -n 's/.*!\[[^]]*\](\([^)]*\)).*/\1/p' "$INPUT"; sed -n 's/.*src="\([^"]*\)".*/\1/p' "$INPUT") |
| 416 | |
| 417 | MERMAID_COUNT=$(grep -c '```mermaid' "$INPUT" || true) |
| 418 | if [[ $MERMAID_COUNT -gt 0 ]]; then |
| 419 | success "Mermaid diagrams: ${CYAN}${MERMAID_COUNT}${RESET}" |
| 420 | fi |
| 421 | |
| 422 | CALLOUT_COUNT=$(grep -c '> \[!' "$INPUT" || true) |
| 423 | if [[ $CALLOUT_COUNT -gt 0 ]]; then |
| 424 | success "Callouts: ${CYAN}${CALLOUT_COUNT}${RESET}" |
| 425 | fi |
| 426 | |
| 427 | TABLE_COUNT=$(grep -c '^|' "$INPUT" || true) |
| 428 | CODE_COUNT=$(grep -c '```' "$INPUT" || true) |
| 429 | CODE_COUNT=$(( (CODE_COUNT - MERMAID_COUNT * 2) / 2 )) |
| 430 | [[ $TABLE_COUNT -gt 0 ]] && detail "Tables: ${TABLE_COUNT} rows" |
| 431 | [[ $CODE_COUNT -gt 0 ]] && detail "Code blocks: ~${CODE_COUNT}" |
| 432 | |
| 433 | echo "" |
| 434 | info "Found ${GREEN}${#IMAGES[@]}${RESET} image(s), ${GREEN}${MERMAID_COUNT}${RESET} mermaid diagram(s), ${GREEN}${CALLOUT_COUNT}${RESET} callout(s)" |
| 435 | |
| 436 | # --- Write the conversion script to a temp file (mounted into Docker) --- |
| 437 | CONVERT_SCRIPT="${INPUT_DIR}/.pdfify-convert-$$.sh" |
| 438 | trap 'rm -f "$CONVERT_SCRIPT"' EXIT |
| 439 | cat > "$CONVERT_SCRIPT" <<'INNER_SCRIPT' |
| 440 | #!/bin/bash |
| 441 | set -euo pipefail |
| 442 | |
| 443 | RED='\033[0;31m' |
| 444 | GREEN='\033[0;32m' |
| 445 | YELLOW='\033[0;33m' |
| 446 | BLUE='\033[0;34m' |
| 447 | CYAN='\033[0;36m' |
| 448 | BOLD='\033[1m' |
| 449 | DIM='\033[2m' |
| 450 | RESET='\033[0m' |
| 451 | |
| 452 | info() { echo -e "${BLUE}::${RESET} ${BOLD}$*${RESET}"; } |
| 453 | success() { echo -e "${GREEN}✓${RESET} $*"; } |
| 454 | detail() { echo -e " ${DIM}→${RESET} $*"; } |
| 455 | |
| 456 | INPUT_FILE="$1" |
| 457 | OUTPUT_FILE="$2" |
| 458 | WORKDIR="/work" |
| 459 | |
| 460 | cd "$WORKDIR" |
| 461 | |
| 462 | # --- Step 0: Strip first H1 if it's being used as document title --- |
| 463 | HIDE_FIRST_H1="${HIDE_FIRST_H1:-0}" |
| 464 | EFFECTIVE_INPUT="$INPUT_FILE" |
| 465 | if [[ "$HIDE_FIRST_H1" == "1" ]]; then |
| 466 | STRIPPED=$(mktemp /tmp/pdfify-stripped-XXXXXX.md) |
| 467 | FOUND_H1=0 |
| 468 | IN_CODE_BLK=0 |
| 469 | IN_FMATTER=0 |
| 470 | while IFS= read -r line || [[ -n "$line" ]]; do |
| 471 | [[ "$line" =~ ^\`\`\` ]] && { if [[ $IN_CODE_BLK -eq 0 ]]; then IN_CODE_BLK=1; else IN_CODE_BLK=0; fi; } |
| 472 | if [[ "$line" == "---" && $IN_CODE_BLK -eq 0 ]]; then |
| 473 | if [[ $IN_FMATTER -eq 0 && $FOUND_H1 -eq 0 ]]; then IN_FMATTER=1; else IN_FMATTER=0; fi |
| 474 | fi |
| 475 | # Skip the first H1 (and any blank line immediately after) |
| 476 | if [[ $FOUND_H1 -eq 0 && $IN_CODE_BLK -eq 0 && $IN_FMATTER -eq 0 && "$line" =~ ^#\ ]]; then |
| 477 | FOUND_H1=1 |
| 478 | continue |
| 479 | fi |
| 480 | # Skip blank line right after removed H1 |
| 481 | if [[ $FOUND_H1 -eq 1 && -z "$line" ]]; then |
| 482 | FOUND_H1=2 |
| 483 | continue |
| 484 | fi |
| 485 | [[ $FOUND_H1 -eq 1 ]] && FOUND_H1=2 |
| 486 | echo "$line" >> "$STRIPPED" |
| 487 | done < "$INPUT_FILE" |
| 488 | EFFECTIVE_INPUT="$(basename "$STRIPPED")" |
| 489 | detail "Stripped first H1 (promoted to title)" |
| 490 | fi |
| 491 | |
| 492 | # --- Step 1: Pre-process Obsidian callouts --- |
| 493 | info "Pre-processing callouts..." |
| 494 | |
| 495 | CALLOUT_MD=$(mktemp /tmp/pdfify-callout-XXXXXX.md) |
| 496 | IN_CALLOUT=0 |
| 497 | CALLOUT_TYPE="" |
| 498 | CALLOUT_TITLE="" |
| 499 | CALLOUT_BUF="" |
| 500 | CALLOUT_COUNT=0 |
| 501 | |
| 502 | flush_callout() { |
| 503 | if [[ $IN_CALLOUT -eq 1 && -n "$CALLOUT_TYPE" ]]; then |
| 504 | CALLOUT_COUNT=$((CALLOUT_COUNT + 1)) |
| 505 | local latex_type |
| 506 | case "${CALLOUT_TYPE,,}" in |
| 507 | info|note) latex_type="calloutinfo" ;; |
| 508 | tip|hint) latex_type="callouttip" ;; |
| 509 | warning|caution) latex_type="calloutwarning" ;; |
| 510 | danger|error|bug) latex_type="calloutdanger" ;; |
| 511 | example) latex_type="calloutexample" ;; |
| 512 | quote|cite) latex_type="calloutquote" ;; |
| 513 | *) latex_type="calloutinfo" ;; |
| 514 | esac |
| 515 | echo "" >> "$CALLOUT_MD" |
| 516 | echo '```{=latex}' >> "$CALLOUT_MD" |
| 517 | echo "\\begin{${latex_type}}{${CALLOUT_TITLE}}" >> "$CALLOUT_MD" |
| 518 | echo '```' >> "$CALLOUT_MD" |
| 519 | echo "" >> "$CALLOUT_MD" |
| 520 | echo "$CALLOUT_BUF" >> "$CALLOUT_MD" |
| 521 | echo "" >> "$CALLOUT_MD" |
| 522 | echo '```{=latex}' >> "$CALLOUT_MD" |
| 523 | echo "\\end{${latex_type}}" >> "$CALLOUT_MD" |
| 524 | echo '```' >> "$CALLOUT_MD" |
| 525 | echo "" >> "$CALLOUT_MD" |
| 526 | fi |
| 527 | IN_CALLOUT=0 |
| 528 | CALLOUT_TYPE="" |
| 529 | CALLOUT_TITLE="" |
| 530 | CALLOUT_BUF="" |
| 531 | } |
| 532 | |
| 533 | while IFS= read -r line || [[ -n "$line" ]]; do |
| 534 | if [[ "$line" =~ ^\>\ *\[!([a-zA-Z]+)\]\ *(.*) ]]; then |
| 535 | flush_callout |
| 536 | IN_CALLOUT=1 |
| 537 | CALLOUT_TYPE="${BASH_REMATCH[1]}" |
| 538 | CALLOUT_TITLE="${BASH_REMATCH[2]:-${BASH_REMATCH[1]^}}" |
| 539 | continue |
| 540 | fi |
| 541 | |
| 542 | if [[ $IN_CALLOUT -eq 1 ]]; then |
| 543 | if [[ "$line" =~ ^\>\ ?(.*) ]]; then |
| 544 | CALLOUT_BUF="${CALLOUT_BUF}${BASH_REMATCH[1]} |
| 545 | " |
| 546 | continue |
| 547 | else |
| 548 | flush_callout |
| 549 | fi |
| 550 | fi |
| 551 | |
| 552 | echo "$line" >> "$CALLOUT_MD" |
| 553 | done < "${STRIPPED:-$INPUT_FILE}" |
| 554 | flush_callout |
| 555 | |
| 556 | if [[ $CALLOUT_COUNT -gt 0 ]]; then |
| 557 | success "Converted $CALLOUT_COUNT callout(s)" |
| 558 | fi |
| 559 | |
| 560 | # --- Step 1b+1c: Inject page breaks (after TOC, before each H1) --- |
| 561 | BREAK_INJECTED=$(mktemp /tmp/pdfify-breaks-XXXXXX.md) |
| 562 | H1_COUNT=0 |
| 563 | IN_FM=0 |
| 564 | IN_CODE=0 |
| 565 | DONE_TOC_BREAK=0 |
| 566 | while IFS= read -r line || [[ -n "$line" ]]; do |
| 567 | # Track code blocks (``` opens/closes) |
| 568 | if [[ "$line" =~ ^\`\`\` ]]; then |
| 569 | if [[ $IN_CODE -eq 0 ]]; then IN_CODE=1; else IN_CODE=0; fi |
| 570 | echo "$line" >> "$BREAK_INJECTED" |
| 571 | continue |
| 572 | fi |
| 573 | |
| 574 | # Track frontmatter (only at start of file) |
| 575 | if [[ "$line" == "---" && $IN_CODE -eq 0 ]]; then |
| 576 | if [[ $IN_FM -eq 0 && $H1_COUNT -eq 0 ]]; then IN_FM=1; else IN_FM=0; fi |
| 577 | echo "$line" >> "$BREAK_INJECTED" |
| 578 | continue |
| 579 | fi |
| 580 | |
| 581 | if [[ $IN_CODE -eq 0 && $IN_FM -eq 0 ]]; then |
| 582 | # Before first content after frontmatter: inject TOC page break |
| 583 | if [[ $DONE_TOC_BREAK -eq 0 && "$TOC_LEVEL" -gt 0 && -n "$line" ]]; then |
| 584 | echo "" >> "$BREAK_INJECTED" |
| 585 | echo '```{=latex}' >> "$BREAK_INJECTED" |
| 586 | echo '\newpage' >> "$BREAK_INJECTED" |
| 587 | echo '```' >> "$BREAK_INJECTED" |
| 588 | echo "" >> "$BREAK_INJECTED" |
| 589 | DONE_TOC_BREAK=1 |
| 590 | fi |
| 591 | |
| 592 | # Page break before each top-level section (except first) |
| 593 | # Build the marker: numberfrom=1 → "# ", numberfrom=2 → "## " |
| 594 | BREAK_HASHES=$(printf '#%.0s' $(seq 1 "$FILE_NUMBER_FROM")) |
| 595 | if [[ "$line" == "${BREAK_HASHES} "* ]]; then |
| 596 | # Make sure it's exactly that level, not deeper |
| 597 | NEXT_CHAR="${line:${#BREAK_HASHES}:1}" |
| 598 | if [[ "$NEXT_CHAR" != "#" ]]; then |
| 599 | H1_COUNT=$((H1_COUNT + 1)) |
| 600 | if [[ $H1_COUNT -gt 1 && $FILE_PAGEBREAK -eq 1 ]]; then |
| 601 | echo "" >> "$BREAK_INJECTED" |
| 602 | echo '```{=latex}' >> "$BREAK_INJECTED" |
| 603 | echo '\newpage' >> "$BREAK_INJECTED" |
| 604 | echo '```' >> "$BREAK_INJECTED" |
| 605 | echo "" >> "$BREAK_INJECTED" |
| 606 | fi |
| 607 | fi |
| 608 | fi |
| 609 | fi |
| 610 | |
| 611 | echo "$line" >> "$BREAK_INJECTED" |
| 612 | done < "$CALLOUT_MD" |
| 613 | rm -f "$CALLOUT_MD" |
| 614 | CALLOUT_MD="$BREAK_INJECTED" |
| 615 | |
| 616 | # --- Step 2: Pre-render Mermaid blocks to PNG --- |
| 617 | info "Pre-rendering Mermaid diagrams..." |
| 618 | |
| 619 | TEMP_MD=$(mktemp /tmp/pdfify-XXXXXX.md) |
| 620 | MERMAID_COUNT=0 |
| 621 | IN_MERMAID=0 |
| 622 | MERMAID_BUF="" |
| 623 | |
| 624 | while IFS= read -r line || [[ -n "$line" ]]; do |
| 625 | if [[ "$line" =~ ^\`\`\`mermaid ]]; then |
| 626 | IN_MERMAID=1 |
| 627 | MERMAID_BUF="" |
| 628 | continue |
| 629 | fi |
| 630 | |
| 631 | if [[ $IN_MERMAID -eq 1 ]]; then |
| 632 | if [[ "$line" =~ ^\`\`\` ]]; then |
| 633 | IN_MERMAID=0 |
| 634 | MERMAID_COUNT=$((MERMAID_COUNT + 1)) |
| 635 | MERMAID_FILE="/tmp/mermaid-${MERMAID_COUNT}.mmd" |
| 636 | MERMAID_PNG="/tmp/mermaid-${MERMAID_COUNT}.png" |
| 637 | |
| 638 | echo "$MERMAID_BUF" > "$MERMAID_FILE" |
| 639 | |
| 640 | detail "Rendering diagram ${CYAN}#${MERMAID_COUNT}${RESET}..." |
| 641 | mmdc -i "$MERMAID_FILE" \ |
| 642 | -o "$MERMAID_PNG" \ |
| 643 | -w 1600 \ |
| 644 | -b transparent \ |
| 645 | -c /opt/mermaid-config.json \ |
| 646 | -p /opt/puppeteer-config.json \ |
| 647 | 2>/dev/null || { |
| 648 | echo -e " ${YELLOW}⚠${RESET} Diagram $MERMAID_COUNT failed — inserting as code block" |
| 649 | echo '```' >> "$TEMP_MD" |
| 650 | echo "$MERMAID_BUF" >> "$TEMP_MD" |
| 651 | echo '```' >> "$TEMP_MD" |
| 652 | continue |
| 653 | } |
| 654 | |
| 655 | SIZE=$(du -h "$MERMAID_PNG" 2>/dev/null | cut -f1 | tr -d ' ') |
| 656 | success "Diagram #${MERMAID_COUNT} rendered ${DIM}(${SIZE})${RESET}" |
| 657 | |
| 658 | echo "" >> "$TEMP_MD" |
| 659 | echo "\\" >> "$TEMP_MD" |
| 660 | echo "" >> "$TEMP_MD" |
| 661 | else |
| 662 | MERMAID_BUF="${MERMAID_BUF}${line} |
| 663 | " |
| 664 | fi |
| 665 | else |
| 666 | echo "$line" >> "$TEMP_MD" |
| 667 | fi |
| 668 | done < "$CALLOUT_MD" |
| 669 | |
| 670 | # --- Strip YAML frontmatter so pandoc doesn't generate its own title --- |
| 671 | # pdfify already parses frontmatter above; letting pandoc see it causes a |
| 672 | # duplicate title (pandoc's \maketitle + pdfify's custom title banner). |
| 673 | if head -1 "$TEMP_MD" | grep -q '^---'; then |
| 674 | STRIPPED_FM=$(mktemp /tmp/pdfify-nofm-XXXXXX.md) |
| 675 | awk 'NR==1 && /^---/{skip=1; next} skip && /^---/{skip=0; next} !skip' "$TEMP_MD" > "$STRIPPED_FM" |
| 676 | mv "$STRIPPED_FM" "$TEMP_MD" |
| 677 | fi |
| 678 | |
| 679 | # --- Lua filter: protect brackets in headings for titlesec --- |
| 680 | # Square brackets in headings break titlesec (\SQSPL@scan error) because LaTeX |
| 681 | # interprets [ as the start of an optional argument. |
| 682 | BRACKET_FILTER=$(mktemp /tmp/pdfify-bracket-filter-XXXXXX.lua) |
| 683 | cat > "$BRACKET_FILTER" <<'LUAFILTER' |
| 684 | -- Protect square brackets in headings to prevent titlesec \SQSPL@scan errors. |
| 685 | -- Brackets in headings make titlesec think they are optional arguments. |
| 686 | -- We replace [ and ] with \lbrack/\rbrack in all inline types. |
| 687 | |
| 688 | function Header(el) |
| 689 | if FORMAT ~= "latex" and FORMAT ~= "pdf" then return nil end |
| 690 | |
| 691 | el = el:walk { |
| 692 | Str = function(s) |
| 693 | if s.text:find("[%[%]]") then |
| 694 | local t = s.text:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}") |
| 695 | return pandoc.RawInline("latex", t) |
| 696 | end |
| 697 | end, |
| 698 | Code = function(c) |
| 699 | -- All code in headings must use \oldtexttt to bypass seqsplit |
| 700 | -- (seqsplit in titlesec moving arguments causes \SQSPL@scan errors) |
| 701 | local t = c.text |
| 702 | t = t:gsub("\\", "\\textbackslash ") |
| 703 | t = t:gsub("%%", "\\%%") |
| 704 | t = t:gsub("%#", "\\#") |
| 705 | t = t:gsub("%$", "\\$") |
| 706 | t = t:gsub("%&", "\\&") |
| 707 | t = t:gsub("_", "\\_") |
| 708 | t = t:gsub("%{", "\\{") |
| 709 | t = t:gsub("%}", "\\}") |
| 710 | t = t:gsub("~", "\\textasciitilde{}") |
| 711 | t = t:gsub("%^", "\\textasciicircum{}") |
| 712 | t = t:gsub("%[", "\\lbrack{}"):gsub("%]", "\\rbrack{}") |
| 713 | return pandoc.RawInline("latex", "\\oldtexttt{" .. t .. "}") |
| 714 | end |
| 715 | } |
| 716 | return el |
| 717 | end |
| 718 | LUAFILTER |
| 719 | |
| 720 | echo "" |
| 721 | info "Generating PDF with Pandoc + XeLaTeX..." |
| 722 | detail "Engine: xelatex" |
| 723 | detail "Font: Roboto / Roboto Mono" |
| 724 | detail "Margins: 0.5in, Font size: 10pt" |
| 725 | echo "" |
| 726 | |
| 727 | # Write LaTeX preamble for modern styling |
| 728 | PREAMBLE=$(mktemp /tmp/pdfify-preamble-XXXXXX.tex) |
| 729 | cat > "$PREAMBLE" <<'LATEX' |
| 730 | % --- Modern color scheme --- |
| 731 | \usepackage{xcolor} |
| 732 | \definecolor{accent}{HTML}{374151} |
| 733 | \definecolor{accentdark}{HTML}{111827} |
| 734 | \definecolor{codebg}{HTML}{F8F9FA} |
| 735 | \definecolor{codeborder}{HTML}{E2E8F0} |
| 736 | \definecolor{headrulecolor}{HTML}{E2E8F0} |
| 737 | |
| 738 | % --- Callout colors --- |
| 739 | \definecolor{infobg}{HTML}{EFF6FF} |
| 740 | \definecolor{infobar}{HTML}{3B82F6} |
| 741 | \definecolor{infofg}{HTML}{1E40AF} |
| 742 | \definecolor{tipbg}{HTML}{F0FDF4} |
| 743 | \definecolor{tipbar}{HTML}{22C55E} |
| 744 | \definecolor{tipfg}{HTML}{166534} |
| 745 | \definecolor{warningbg}{HTML}{FFFBEB} |
| 746 | \definecolor{warningbar}{HTML}{F59E0B} |
| 747 | \definecolor{warningfg}{HTML}{92400E} |
| 748 | \definecolor{dangerbg}{HTML}{FEF2F2} |
| 749 | \definecolor{dangerbar}{HTML}{EF4444} |
| 750 | \definecolor{dangerfg}{HTML}{991B1B} |
| 751 | \definecolor{examplebg}{HTML}{F5F3FF} |
| 752 | \definecolor{examplebar}{HTML}{8B5CF6} |
| 753 | \definecolor{examplefg}{HTML}{5B21B6} |
| 754 | \definecolor{quotecallbg}{HTML}{F8F9FA} |
| 755 | \definecolor{quotecallbar}{HTML}{6B7280} |
| 756 | \definecolor{quotecallfg}{HTML}{374151} |
| 757 | |
| 758 | % --- Code block wrapping and styling --- |
| 759 | \usepackage{fvextra} |
| 760 | \DefineVerbatimEnvironment{Highlighting}{Verbatim}{ |
| 761 | breaklines, |
| 762 | breakanywhere, |
| 763 | commandchars=\\\{\}, |
| 764 | fontsize=\small |
| 765 | } |
| 766 | |
| 767 | % Background on code blocks via mdframed |
| 768 | \usepackage[framemethod=tikz]{mdframed} |
| 769 | |
| 770 | % Override pandoc's Shaded environment (define first if pandoc didn't) |
| 771 | \makeatletter |
| 772 | \@ifundefined{Shaded}{\newenvironment{Shaded}{}{}}{} |
| 773 | \makeatother |
| 774 | \renewenvironment{Shaded}{% |
| 775 | \begin{mdframed}[ |
| 776 | backgroundcolor=codebg, |
| 777 | hidealllines=true, |
| 778 | roundcorner=4pt, |
| 779 | innertopmargin=8pt, |
| 780 | innerbottommargin=8pt, |
| 781 | innerleftmargin=10pt, |
| 782 | innerrightmargin=10pt, |
| 783 | skipabove=10pt, |
| 784 | skipbelow=10pt |
| 785 | ] |
| 786 | }{% |
| 787 | \end{mdframed} |
| 788 | } |
| 789 | |
| 790 | % --- Callout environments --- |
| 791 | \newenvironment{calloutbase}[3]{% |
| 792 | \begin{mdframed}[ |
| 793 | backgroundcolor=#1, |
| 794 | linecolor=#2, |
| 795 | linewidth=3pt, |
| 796 | topline=false, |
| 797 | bottomline=false, |
| 798 | rightline=false, |
| 799 | innertopmargin=12pt, |
| 800 | innerbottommargin=12pt, |
| 801 | innerleftmargin=12pt, |
| 802 | innerrightmargin=12pt, |
| 803 | skipabove=12pt, |
| 804 | skipbelow=12pt, |
| 805 | roundcorner=0pt |
| 806 | ] |
| 807 | \textbf{\color{#2}#3}\par\smallskip\setlength{\parindent}{0pt} |
| 808 | }{% |
| 809 | \end{mdframed} |
| 810 | } |
| 811 | |
| 812 | \newenvironment{calloutinfo}[1]{\begin{calloutbase}{infobg}{infobar}{#1}}{\end{calloutbase}} |
| 813 | \newenvironment{callouttip}[1]{\begin{calloutbase}{tipbg}{tipbar}{#1}}{\end{calloutbase}} |
| 814 | \newenvironment{calloutwarning}[1]{\begin{calloutbase}{warningbg}{warningbar}{#1}}{\end{calloutbase}} |
| 815 | \newenvironment{calloutdanger}[1]{\begin{calloutbase}{dangerbg}{dangerbar}{#1}}{\end{calloutbase}} |
| 816 | \newenvironment{calloutexample}[1]{\begin{calloutbase}{examplebg}{examplebar}{#1}}{\end{calloutbase}} |
| 817 | \newenvironment{calloutquote}[1]{\begin{calloutbase}{quotecallbg}{quotecallbar}{#1}}{\end{calloutbase}} |
| 818 | |
| 819 | % --- PDF bookmarks (sidebar navigation in PDF viewers) --- |
| 820 | \usepackage{bookmark} |
| 821 | \bookmarksetup{ |
| 822 | numbered=false, |
| 823 | open, |
| 824 | openlevel=2 |
| 825 | } |
| 826 | |
| 827 | % --- Title banner --- |
| 828 | \definecolor{titlebg}{HTML}{E5E7EB} |
| 829 | |
| 830 | % --- Page break after TOC --- |
| 831 | \let\oldtableofcontents\tableofcontents |
| 832 | \renewcommand{\tableofcontents}{\oldtableofcontents\clearpage} |
| 833 | |
| 834 | % --- TOC styling --- |
| 835 | \usepackage{tocloft} |
| 836 | \setlength{\cftbeforetoctitleskip}{0.5em} |
| 837 | \renewcommand{\cfttoctitlefont}{\LARGE\bfseries\color{accentdark}\scshape} |
| 838 | \renewcommand{\cftaftertoctitle}{\par\vspace{2pt}{\color{headrulecolor}\hrule height 1pt}\vspace{10pt}} |
| 839 | \renewcommand{\cftsecfont}{\bfseries\color{accentdark}} |
| 840 | \renewcommand{\cftsecpagefont}{\bfseries\color{accentdark}} |
| 841 | \renewcommand{\cftsubsecfont}{\color{accent}} |
| 842 | \renewcommand{\cftsubsecpagefont}{\color{accent}} |
| 843 | \renewcommand{\cftsubsubsecfont}{\small\color{accent}} |
| 844 | \renewcommand{\cftsubsubsecpagefont}{\small\color{accent}} |
| 845 | \renewcommand{\cftsecleader}{\cftdotfill{\cftsecdotsep}} |
| 846 | \renewcommand{\cftsecdotsep}{\cftdotsep} |
| 847 | \setlength{\cftbeforesecskip}{6pt} |
| 848 | \setlength{\cftbeforesubsecskip}{2pt} |
| 849 | |
| 850 | % --- Heading font --- |
| 851 | \newfontfamily\headingfont{Roboto}[BoldFont={Roboto Bold}] |
| 852 | |
| 853 | % --- Symbol fallback (arrows, etc.) --- |
| 854 | \usepackage{newunicodechar} |
| 855 | \newfontfamily\fallbackfont{Liberation Sans}[Scale=MatchLowercase] |
| 856 | \newunicodechar{→}{{\fallbackfont →}} |
| 857 | \newunicodechar{←}{{\fallbackfont ←}} |
| 858 | \newunicodechar{↔}{{\fallbackfont ↔}} |
| 859 | \newunicodechar{⇒}{{\fallbackfont ⇒}} |
| 860 | \newunicodechar{⇐}{{\fallbackfont ⇐}} |
| 861 | \newunicodechar{✓}{{\fallbackfont ✓}} |
| 862 | \newunicodechar{✗}{{\fallbackfont ✗}} |
| 863 | |
| 864 | % --- Modern section headings (tight, bold, dark) --- |
| 865 | \usepackage{titlesec} |
| 866 | |
| 867 | % H1: # headings — large, small caps, dark, with rule |
| 868 | \titleformat{\section} |
| 869 | {\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape} |
| 870 | {\thesection}{0.5em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}] |
| 871 | \titlespacing*{\section}{0pt}{20pt}{10pt} |
| 872 | |
| 873 | % H2: ## headings |
| 874 | \titleformat{\subsection} |
| 875 | {\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}} |
| 876 | {\thesubsection}{0.5em}{} |
| 877 | \titlespacing*{\subsection}{0pt}{16pt}{8pt} |
| 878 | |
| 879 | % H3: ### headings |
| 880 | \titleformat{\subsubsection} |
| 881 | {\large\bfseries\color{accent}} |
| 882 | {\thesubsubsection}{0.5em}{} |
| 883 | \titlespacing*{\subsubsection}{0pt}{12pt}{6pt} |
| 884 | |
| 885 | % H4: #### headings |
| 886 | \titleformat{\paragraph}[hang] |
| 887 | {\normalsize\bfseries\color{accent}} |
| 888 | {\theparagraph}{0.5em}{} |
| 889 | \titlespacing*{\paragraph}{0pt}{10pt}{4pt} |
| 890 | |
| 891 | %%SECNUMDEPTH_PLACEHOLDER%% |
| 892 | |
| 893 | % --- Page style (header/footer injected by pdfify) --- |
| 894 | \usepackage{fancyhdr} |
| 895 | \pagestyle{fancy} |
| 896 | \fancyhf{} |
| 897 | \renewcommand{\headrulewidth}{0pt} |
| 898 | \renewcommand{\footrulewidth}{0pt} |
| 899 | \setlength{\headheight}{14pt} |
| 900 | %%HEADER_PLACEHOLDER%% |
| 901 | %%FOOTER_PLACEHOLDER%% |
| 902 | % Make plain style identical to fancy (so title/TOC pages get the same footer) |
| 903 | \fancypagestyle{plain}{\fancyhf{}\renewcommand{\headrulewidth}{0pt}\renewcommand{\footrulewidth}{0pt}%%FOOTER_PLAIN%%} |
| 904 | |
| 905 | % --- Blockquote styling (plain > quotes, not callouts) --- |
| 906 | \usepackage{etoolbox} |
| 907 | \renewenvironment{quote}{% |
| 908 | \begin{mdframed}[ |
| 909 | backgroundcolor=infobg, |
| 910 | linecolor=infobar, |
| 911 | linewidth=3pt, |
| 912 | topline=false, |
| 913 | bottomline=false, |
| 914 | rightline=false, |
| 915 | innertopmargin=12pt, |
| 916 | innerbottommargin=12pt, |
| 917 | innerleftmargin=12pt, |
| 918 | innerrightmargin=12pt, |
| 919 | skipabove=10pt, |
| 920 | skipbelow=10pt, |
| 921 | roundcorner=0pt |
| 922 | ]% |
| 923 | }{% |
| 924 | \end{mdframed}% |
| 925 | } |
| 926 | |
| 927 | % --- Table styling --- |
| 928 | \usepackage{booktabs} |
| 929 | \usepackage{colortbl} |
| 930 | \usepackage{longtable} |
| 931 | \usepackage{tabularx} |
| 932 | \arrayrulecolor{codeborder} |
| 933 | |
| 934 | % Alternating row shading |
| 935 | \definecolor{tablerowgray}{HTML}{F3F4F6} |
| 936 | \let\oldlongtable\longtable |
| 937 | \let\endoldlongtable\endlongtable |
| 938 | \renewenvironment{longtable}{\rowcolors{2}{white}{tablerowgray}\oldlongtable}{\endoldlongtable} |
| 939 | |
| 940 | % Allow line breaks in table cells and shrink monospace to fit |
| 941 | \usepackage{array} |
| 942 | \renewcommand{\arraystretch}{1.4} |
| 943 | \let\oldtexttt\texttt |
| 944 | \renewcommand{\texttt}[1]{{\small\oldtexttt{\seqsplit{#1}}}} |
| 945 | \usepackage{seqsplit} |
| 946 | \setlength{\tabcolsep}{4pt} |
| 947 | |
| 948 | % --- Images constrained to page --- |
| 949 | \usepackage{grffile} |
| 950 | \usepackage[export]{adjustbox} |
| 951 | \let\oldincludegraphics\includegraphics |
| 952 | \renewcommand{\includegraphics}[2][]{% |
| 953 | \oldincludegraphics[max width=\textwidth,max height=0.45\textheight,keepaspectratio,#1]{#2}% |
| 954 | } |
| 955 | |
| 956 | % --- Figures don't float --- |
| 957 | \usepackage{float} |
| 958 | \floatplacement{figure}{H} |
| 959 | |
| 960 | % --- Caption styling --- |
| 961 | \usepackage{caption} |
| 962 | \captionsetup{labelformat=empty,font={small,color=gray},skip=4pt} |
| 963 | |
| 964 | % --- Tighter lists --- |
| 965 | \usepackage{enumitem} |
| 966 | \setlist{nosep,leftmargin=1.5em} |
| 967 | |
| 968 | % --- Links --- |
| 969 | \usepackage{hyperref} |
| 970 | \hypersetup{ |
| 971 | colorlinks=true, |
| 972 | linkcolor=accent, |
| 973 | urlcolor=accent, |
| 974 | citecolor=accent |
| 975 | } |
| 976 | |
| 977 | % --- Horizontal rules --- |
| 978 | \renewcommand{\rule}[2]{\textcolor{headrulecolor}{\vrule width \textwidth height 0.5pt}} |
| 979 | LATEX |
| 980 | |
| 981 | TOC_LEVEL="${TOC_LEVEL:-3}" |
| 982 | FM_FOOTER="${FM_FOOTER:-}" |
| 983 | FM_HEADER="${FM_HEADER:-}" |
| 984 | FM_AUTHOR="${FM_AUTHOR:-}" |
| 985 | FM_DATE="${FM_DATE:-}" |
| 986 | FM_DATE_LABEL="${FM_DATE_LABEL:-}" |
| 987 | FM_DATE_HASH="${FM_DATE_HASH:-}" |
| 988 | FM_DATE_DIRTY="${FM_DATE_DIRTY:-}" |
| 989 | FILE_NUMBERS="${FILE_NUMBERS:-1}" |
| 990 | FILE_NUMBER_FROM="${FILE_NUMBER_FROM:-2}" |
| 991 | FILE_PAGEBREAK="${FILE_PAGEBREAK:-1}" |
| 992 | |
| 993 | # Escape LaTeX special characters in text fields (uses sed to avoid |
| 994 | # bash parameter substitution brace-parsing issues with } in replacements) |
| 995 | latex_escape() { |
| 996 | printf '%s' "$1" | sed \ |
| 997 | -e 's/\\/@@BSLASH@@/g' \ |
| 998 | -e 's/&/\\&/g' \ |
| 999 | -e 's/%/\\%/g' \ |
| 1000 | -e 's/\$/\\$/g' \ |
| 1001 | -e 's/#/\\#/g' \ |
| 1002 | -e 's/_/\\_/g' \ |
| 1003 | -e 's/{/\\{/g' \ |
| 1004 | -e 's/}/\\}/g' \ |
| 1005 | -e 's/~/\\textasciitilde{}/g' \ |
| 1006 | -e 's/\^/\\textasciicircum{}/g' \ |
| 1007 | -e 's/@@BSLASH@@/\\textbackslash{}/g' |
| 1008 | } |
| 1009 | |
| 1010 | # Inject title banner into preamble |
| 1011 | FM_TITLE="${FM_TITLE:-}" |
| 1012 | FM_TITLE_TEX="$(latex_escape "$FM_TITLE")" |
| 1013 | FM_SUBTITLE_TEX="$(latex_escape "${FM_SUBTITLE:-}")" |
| 1014 | FM_AUTHOR_TEX="$(latex_escape "${FM_AUTHOR:-}")" |
| 1015 | |
| 1016 | { |
| 1017 | if [[ -n "$FM_TITLE" ]]; then |
| 1018 | cat <<'TITLE_STATIC' |
| 1019 | \makeatletter |
| 1020 | \renewcommand{\maketitle}{% |
| 1021 | \thispagestyle{fancy}% |
| 1022 | \vspace*{-\topskip}% |
| 1023 | \vspace*{-\headsep}% |
| 1024 | \vspace*{-\headheight}% |
| 1025 | \vspace*{-0.55in}% |
| 1026 | \noindent\hspace*{-0.5in}% |
| 1027 | \fcolorbox{titlebg}{titlebg}{% |
| 1028 | \parbox{\dimexpr\paperwidth-2\fboxsep-2\fboxrule}{% |
| 1029 | \hspace*{0.3in}\begin{minipage}{\dimexpr\textwidth}% |
| 1030 | \vspace{20pt}% |
| 1031 | TITLE_STATIC |
| 1032 | |
| 1033 | echo " {\\fontsize{28}{34}\\selectfont\\bfseries\\color{black}${FM_TITLE_TEX}}\\\\[6pt]%" |
| 1034 | |
| 1035 | FM_SUBTITLE="${FM_SUBTITLE:-}" |
| 1036 | if [[ -n "$FM_SUBTITLE" ]]; then |
| 1037 | echo " {\\fontsize{14}{18}\\selectfont\\color{black}${FM_SUBTITLE_TEX}}\\\\[8pt]%" |
| 1038 | fi |
| 1039 | |
| 1040 | if [[ -n "$FM_AUTHOR" ]]; then |
| 1041 | echo " {\\fontsize{11}{14}\\selectfont\\color{black}${FM_AUTHOR_TEX}}\\\\[6pt]%" |
| 1042 | fi |
| 1043 | |
| 1044 | if [[ -n "$FM_DATE" ]]; then |
| 1045 | DATE_VAL="" |
| 1046 | if [[ -n "$FM_DATE_HASH" ]]; then |
| 1047 | DATE_VAL="${FM_DATE% · *} · {\\texttt{${FM_DATE_HASH}}}" |
| 1048 | else |
| 1049 | DATE_VAL="${FM_DATE}" |
| 1050 | fi |
| 1051 | DIRTY_PART="" |
| 1052 | if [[ -n "${FM_DATE_DIRTY:-}" ]]; then |
| 1053 | DIRTY_PART=" {\\color{gray}\\itshape (dirty)}" |
| 1054 | fi |
| 1055 | if [[ -n "$FM_DATE_LABEL" ]]; then |
| 1056 | echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL} {\\color{gray}--- ${FM_DATE_LABEL}}${DIRTY_PART}}\\\\[4pt]%" |
| 1057 | else |
| 1058 | echo " {\\fontsize{10}{12}\\selectfont\\color{black}${DATE_VAL}${DIRTY_PART}}\\\\[4pt]%" |
| 1059 | fi |
| 1060 | fi |
| 1061 | |
| 1062 | cat <<'TITLE_END' |
| 1063 | \vspace{6pt}% |
| 1064 | \end{minipage}% |
| 1065 | }% |
| 1066 | }% |
| 1067 | \par\vspace{20pt}% |
| 1068 | } |
| 1069 | \makeatother |
| 1070 | TITLE_END |
| 1071 | echo '\AtBeginDocument{\maketitle}' |
| 1072 | else |
| 1073 | echo '\renewcommand{\maketitle}{}' |
| 1074 | fi |
| 1075 | } >> "$PREAMBLE" |
| 1076 | |
| 1077 | # Inject header/footer into preamble |
| 1078 | GIT_STAMP="${GIT_STAMP:-}" |
| 1079 | |
| 1080 | FOOTER_L="" |
| 1081 | FOOTER_C="" |
| 1082 | FOOTER_R="\\\\fancyfoot[R]{\\\\color{gray}\\\\small Page \\\\thepage\\\\ of \\\\pageref*{LastPage}}" |
| 1083 | |
| 1084 | [[ -n "$FM_FOOTER" ]] && FOOTER_L="\\\\fancyfoot[L]{\\\\color{gray}\\\\small ${FM_FOOTER}}" |
| 1085 | |
| 1086 | sed -i "s|%%FOOTER_PLACEHOLDER%%|\\\\usepackage{lastpage}${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE" |
| 1087 | sed -i "s|%%FOOTER_PLAIN%%|${FOOTER_L}${FOOTER_C}${FOOTER_R}|" "$PREAMBLE" |
| 1088 | |
| 1089 | if [[ -n "$FM_HEADER" ]]; then |
| 1090 | sed -i "s|%%HEADER_PLACEHOLDER%%|\\\\fancyhead[C]{\\\\color{gray}\\\\small ${FM_HEADER}}|" "$PREAMBLE" |
| 1091 | else |
| 1092 | sed -i "s|%%HEADER_PLACEHOLDER%%||" "$PREAMBLE" |
| 1093 | fi |
| 1094 | |
| 1095 | # Inject watermark if set |
| 1096 | FM_WATERMARK="${FM_WATERMARK:-}" |
| 1097 | if [[ -n "$FM_WATERMARK" ]]; then |
| 1098 | cat >> "$PREAMBLE" <<WATERMARK |
| 1099 | \\usepackage{eso-pic} |
| 1100 | \\usepackage{tikz} |
| 1101 | \\AddToShipoutPictureFG{% |
| 1102 | \\begin{tikzpicture}[remember picture,overlay] |
| 1103 | \\node[rotate=45,opacity=0.12,scale=10,text=red] at (current page.center) {\\textsf{\\textbf{\\MakeUppercase{${FM_WATERMARK}}}}}; |
| 1104 | \\end{tikzpicture}% |
| 1105 | } |
| 1106 | WATERMARK |
| 1107 | detail "Watermark: ${CYAN}${FM_WATERMARK}${RESET}" |
| 1108 | fi |
| 1109 | |
| 1110 | # Build TOC flags |
| 1111 | # When numbering is on, headings shift by -1, so TOC depth needs +1 to compensate |
| 1112 | TOC_FLAGS=() |
| 1113 | if [[ "$TOC_LEVEL" -gt 0 ]]; then |
| 1114 | TOC_FLAGS+=(--toc --toc-depth="$TOC_LEVEL") |
| 1115 | detail "TOC depth: ${CYAN}${TOC_LEVEL}${RESET}" |
| 1116 | else |
| 1117 | detail "TOC: ${DIM}disabled${RESET}" |
| 1118 | fi |
| 1119 | |
| 1120 | AUTHOR_FLAGS=() |
| 1121 | if [[ -n "$FM_AUTHOR" ]]; then |
| 1122 | AUTHOR_FLAGS+=(-M "author=$FM_AUTHOR") |
| 1123 | fi |
| 1124 | |
| 1125 | # Numbered sections |
| 1126 | NUMBER_FLAGS=() |
| 1127 | if [[ "$FILE_NUMBERS" == "1" ]]; then |
| 1128 | NUMBER_FLAGS+=(--number-sections) |
| 1129 | |
| 1130 | # numberfrom controls which heading level starts getting numbers |
| 1131 | # pandoc: section=1, subsection=2, subsubsection=3 |
| 1132 | cat >> "$PREAMBLE" <<SECNUM |
| 1133 | \\setcounter{secnumdepth}{4} |
| 1134 | SECNUM |
| 1135 | |
| 1136 | if [[ "$FILE_NUMBER_FROM" -ge 2 ]]; then |
| 1137 | # H1 (\section) unnumbered, H2 numbered as 1, 2, 3 |
| 1138 | cat >> "$PREAMBLE" <<'SECNUM2' |
| 1139 | \makeatletter |
| 1140 | \renewcommand{\thesection}{} |
| 1141 | \renewcommand{\thesubsection}{\arabic{subsection}} |
| 1142 | \renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}} |
| 1143 | % Remove section number from titleformat without changing style |
| 1144 | \titleformat{\section} |
| 1145 | {\LARGE\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=5}\scshape} |
| 1146 | {}{0em}{}[\vspace{2pt}{\color{headrulecolor}\titlerule[1pt]}] |
| 1147 | \makeatother |
| 1148 | SECNUM2 |
| 1149 | fi |
| 1150 | |
| 1151 | if [[ "$FILE_NUMBER_FROM" -ge 3 ]]; then |
| 1152 | cat >> "$PREAMBLE" <<'SECNUM3' |
| 1153 | \renewcommand{\thesubsection}{} |
| 1154 | \renewcommand{\thesubsubsection}{\arabic{subsubsection}} |
| 1155 | \titleformat{\subsection} |
| 1156 | {\Large\headingfont\bfseries\color{accentdark}\addfontfeatures{LetterSpace=-1}} |
| 1157 | {}{0em}{} |
| 1158 | SECNUM3 |
| 1159 | fi |
| 1160 | fi |
| 1161 | |
| 1162 | # Remove placeholder |
| 1163 | sed -i 's|%%SECNUMDEPTH_PLACEHOLDER%%||' "$PREAMBLE" |
| 1164 | |
| 1165 | pandoc "$TEMP_MD" \ |
| 1166 | -o "$OUTPUT_FILE" \ |
| 1167 | --pdf-engine=xelatex \ |
| 1168 | --lua-filter="$BRACKET_FILTER" \ |
| 1169 | --resource-path=".:$WORKDIR" \ |
| 1170 | --columns=72 \ |
| 1171 | -V geometry:"margin=0.5in,includehead,includefoot" \ |
| 1172 | -V fontsize=10pt \ |
| 1173 | -V mainfont="Roboto" \ |
| 1174 | -V monofont="Roboto Mono" \ |
| 1175 | "${TOC_FLAGS[@]}" \ |
| 1176 | "${AUTHOR_FLAGS[@]}" \ |
| 1177 | "${NUMBER_FLAGS[@]}" \ |
| 1178 | --highlight-style=tango \ |
| 1179 | -H "$PREAMBLE" \ |
| 1180 | --standalone |
| 1181 | |
| 1182 | rm -f "$TEMP_MD" "$CALLOUT_MD" "$PREAMBLE" "${BRACKET_FILTER:-}" "${STRIPPED:-}" /tmp/mermaid-*.mmd /tmp/mermaid-*.png |
| 1183 | |
| 1184 | PAGES=$(strings "$OUTPUT_FILE" 2>/dev/null | grep -c '/Type /Page' || echo "?") |
| 1185 | SIZE=$(du -h "$OUTPUT_FILE" | cut -f1 | tr -d ' ') |
| 1186 | success "PDF generated: ${CYAN}${SIZE}${RESET}, ~${CYAN}${PAGES}${RESET} pages" |
| 1187 | INNER_SCRIPT |
| 1188 | |
| 1189 | chmod +x "$CONVERT_SCRIPT" |
| 1190 | |
| 1191 | # --- Run Docker --- |
| 1192 | echo "" |
| 1193 | info "Launching Docker container..." |
| 1194 | detail "Mounting: ${CYAN}${INPUT_DIR}${RESET} → /work ${DIM}(read-only)${RESET}" |
| 1195 | detail "Output: ${CYAN}${OUTPUT_DIR}${RESET} → /output" |
| 1196 | echo "" |
| 1197 | |
| 1198 | CONVERT_BASENAME="$(basename "$CONVERT_SCRIPT")" |
| 1199 | docker run --rm \ |
| 1200 | -v "$INPUT_DIR:/work:ro" \ |
| 1201 | -v "$OUTPUT_DIR:/output" \ |
| 1202 | -e "TOC_LEVEL=$FILE_TOC_LEVEL" \ |
| 1203 | -e "FM_FOOTER=$FM_FOOTER" \ |
| 1204 | -e "FM_HEADER=$FM_HEADER" \ |
| 1205 | -e "FM_AUTHOR=$FM_AUTHOR" \ |
| 1206 | -e "FM_TITLE=$FM_TITLE" \ |
| 1207 | -e "FM_SUBTITLE=$FM_SUBTITLE" \ |
| 1208 | -e "FM_DATE=$FM_DATE" \ |
| 1209 | -e "FM_DATE_LABEL=${FM_DATE_LABEL:-}" \ |
| 1210 | -e "FM_DATE_HASH=${FM_DATE_HASH:-}" \ |
| 1211 | -e "FM_DATE_DIRTY=${FM_DATE_DIRTY:-}" \ |
| 1212 | -e "GIT_STAMP=${GIT_STAMP:-}" \ |
| 1213 | -e "FILE_NUMBERS=$FILE_NUMBERS" \ |
| 1214 | -e "FILE_NUMBER_FROM=$FILE_NUMBER_FROM" \ |
| 1215 | -e "HIDE_FIRST_H1=$HIDE_FIRST_H1" \ |
| 1216 | -e "FM_WATERMARK=$FM_WATERMARK" \ |
| 1217 | -e "FILE_PAGEBREAK=$FILE_PAGEBREAK" \ |
| 1218 | --tmpfs /tmp:exec \ |
| 1219 | "$IMAGE_NAME" "/work/$CONVERT_BASENAME" "$INPUT_FILE" "/output/$OUTPUT_FILE" \ |
| 1220 | || { |
| 1221 | echo "" |
| 1222 | echo -e " ${RED}${BOLD}Error producing PDF.${RESET} Docker/pandoc exited with a non-zero status." |
| 1223 | echo "" |
| 1224 | return 1 |
| 1225 | } |
| 1226 | |
| 1227 | # Move preview file to /tmp and clean up |
| 1228 | if [[ -n "$PREVIEW_FINAL" ]]; then |
| 1229 | mv "$OUTPUT" "$PREVIEW_FINAL" |
| 1230 | OUTPUT="$PREVIEW_FINAL" |
| 1231 | fi |
| 1232 | |
| 1233 | echo "" |
| 1234 | echo -e " ${GREEN}${BOLD}PDF created:${RESET} ${CYAN}${OUTPUT}${RESET}" |
| 1235 | echo "" |
| 1236 | |
| 1237 | # Open if requested |
| 1238 | if [[ $OPEN -eq 1 ]]; then |
| 1239 | open_pdf "$OUTPUT" |
| 1240 | fi |
| 1241 | } |
| 1242 | |
| 1243 | # --- Process each input file --- |
| 1244 | run_all() { |
| 1245 | local FAILED=0 |
| 1246 | for input_file in "${POSITIONAL[@]}"; do |
| 1247 | convert_file "$input_file" "$OUT_FILE" || FAILED=$((FAILED + 1)) |
| 1248 | done |
| 1249 | |
| 1250 | if [[ $FAILED -eq 0 ]]; then |
| 1251 | header "Complete! (${#POSITIONAL[@]} file(s))" |
| 1252 | else |
| 1253 | header "${FAILED} of ${#POSITIONAL[@]} file(s) failed" |
| 1254 | fi |
| 1255 | } |
| 1256 | |
| 1257 | run_all |
| 1258 | |
| 1259 | # --- Watch mode --- |
| 1260 | if [[ $WATCH -eq 1 ]]; then |
| 1261 | info "Watching for changes... ${DIM}(Ctrl+C to stop)${RESET}" |
| 1262 | echo "" |
| 1263 | |
| 1264 | # Get initial checksums (using a temp file instead of associative array for bash 3 compat) |
| 1265 | CHECKSUM_FILE=$(mktemp) |
| 1266 | trap 'rm -f "$CHECKSUM_FILE"' EXIT |
| 1267 | for f in "${POSITIONAL[@]}"; do |
| 1268 | fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")" |
| 1269 | echo "$(_sha256 < "$fpath" | cut -d' ' -f1) $fpath" >> "$CHECKSUM_FILE" |
| 1270 | done |
| 1271 | |
| 1272 | while true; do |
| 1273 | sleep 2 |
| 1274 | CHANGED=0 |
| 1275 | for f in "${POSITIONAL[@]}"; do |
| 1276 | fpath="$(cd "$(dirname "$f")" && pwd)/$(basename "$f")" |
| 1277 | NEW_HASH=$(_sha256 < "$fpath" | cut -d' ' -f1) |
| 1278 | OLD_HASH=$(grep " $fpath\$" "$CHECKSUM_FILE" | cut -d' ' -f1) |
| 1279 | if [[ "$NEW_HASH" != "$OLD_HASH" ]]; then |
| 1280 | CHANGED=1 |
| 1281 | # Update stored checksum |
| 1282 | grep -v " $fpath\$" "$CHECKSUM_FILE" > "${CHECKSUM_FILE}.tmp" || true |
| 1283 | echo "$NEW_HASH $fpath" >> "${CHECKSUM_FILE}.tmp" |
| 1284 | mv "${CHECKSUM_FILE}.tmp" "$CHECKSUM_FILE" |
| 1285 | fi |
| 1286 | done |
| 1287 | if [[ $CHANGED -eq 1 ]]; then |
| 1288 | echo "" |
| 1289 | info "Change detected — rebuilding..." |
| 1290 | echo "" |
| 1291 | run_all |
| 1292 | fi |
| 1293 | done |
| 1294 | fi |
| 1295 | |
| 1296 | # Check for updates (runs after success, fast timeout) |
| 1297 | check_for_update |
| 1298 |