capacheck.sh

#!/usr/bin/env bash
#
# capacheck.sh
# Author: Robert Miller, St. John's, Newfoundland and Labrador
#
# Purpose:
#   Validate JPEG images for CAPA-style submission requirements.
#
# Notes:
#   Designed for macOS with ExifTool and ImageMagick installed.
#   Compatible with Linux and other UNIX-based systems like OpenBSD.
#   Update IDENTIFY and EXIFTOOL paths, if required.
#
# On macOS:
# Install Homebrew (https://brew.sh/), then run 
#   brew install exiftool
#   brew install imagemagick
#
# Installation:
# Save this script to a directory in your system PATH (e.g., ~/bin or /usr/local/bin).
# Example:
#   mkdir -p ~/bin
#   mv capacheck.sh ~/bin/
#   chmod +x ~/bin/capacheck.sh
#
# To verify your PATH:
#   echo $PATH
#
# Ensure the script location is listed. If not, add it:
#   export PATH="$HOME/bin:$PATH"
#
# To make this permanent (macOS with bash):
#   echo 'export PATH="$HOME/bin:$PATH"' >> ~/.bash_profile
#
# Reload your shell:
#   source ~/.bash_profile
#
# Confirm script is accessible:
#   which capacheck.sh
#   capacheck.sh --help
#
#
# Checks:
#   - width
#   - height
#   - colour space
#   - file size
#   - metadata presence:
#       - camera details
#       - software/editing details
#
# Tools required:
#   /usr/local/bin/identify
#   /usr/local/bin/exiftool
#
# Usage examples:
#   ./capacheck.sh
#   ./capacheck.sh "*.jpg"
#   ./capacheck.sh --failed *.jpg
#   ./capacheck.sh --all --fields name,dimensions,colorspace,size_human,meta_camera,meta_software,status *.jpg
#   ./capacheck.sh --report
#   ./capacheck.sh --report save
#   ./capacheck.sh --report both
#   ./capacheck.sh --term-width 140 --report screen *.jpg
#

set -u
set -o pipefail

IDENTIFY="/usr/local/bin/identify"
EXIFTOOL="/usr/local/bin/exiftool"

# ------------------------------------------------------------------
# Defaults - edit as needed
# ------------------------------------------------------------------
MAX_WIDTH=1400
MAX_HEIGHT=1050
MAX_FILE_BYTES=1800000
REQUIRED_COLORSPACE="sRGB"

REQUIRE_CAMERA_METADATA=1
REQUIRE_SOFTWARE_METADATA=1
REQUIRE_ONE_EDGE_AT_MAX=1

SHOW_MODE="summary"   # summary | all | failed
FIELDS="name,dimensions,colorspace,size_human,meta_camera,meta_software,status"
REPORT_MODE=""        # "" | screen | save | both
PROMPT_FIRST=1

DEFAULT_TERM_WIDTH=120
TERM_WIDTH=""
MAX_COL_WIDTH=30

TMP_FILES=""
TMP_RESULTS=""
FIRST_DIR=""
TOTAL=0
PASSED=0
FAILED=0
MAX_FILE_MB=""

# ------------------------------------------------------------------
# Helper functions
# ------------------------------------------------------------------
recalc_size_values() {
  MAX_FILE_MB=$(awk "BEGIN {printf \"%.2f\", $MAX_FILE_BYTES/1000000}")
}

yes_no() {
  if [ "$1" -eq 1 ]; then
    printf "yes"
  else
    printf "no"
  fi
}

have_cmd() {
  [ -x "$1" ]
}

usage() {
  cat <<EOF
Usage:
  $(basename "$0") [options] [files or globs]

Options:
  --all                    Show all results
  --failed                 Show only failed images
  --summary                Show summary-style output (default)
  --fields LIST            Comma-separated output fields
  --report [screen|save|both]
                           Generate detailed report
                           screen = view in less
                           save   = write report file
                           both   = screen + save
                           Default if omitted: screen
  --no-prompt              Do not prompt before scanning
  --max-width N            Set max width
  --max-height N           Set max height
  --max-bytes N            Set max file size in bytes
  --colorspace NAME        Set required colour space
  --no-edge-max            Do not require one edge to equal max dimension
  --no-camera-meta         Do not require camera metadata
  --no-software-meta       Do not require software metadata
  --term-width N           Set display width for output formatting
  --help | -h | -?         Show this help

Fields:
  name
  width
  height
  dimensions
  colorspace
  size
  size_human
  meta_camera
  meta_software
  pass_width
  pass_height
  pass_edge
  pass_colorspace
  pass_size
  pass_camera
  pass_software
  status

Examples:
  $(basename "$0")
  $(basename "$0") "*.jpg"
  $(basename "$0") --failed *.jpg
  $(basename "$0") --all --fields name,dimensions,colorspace,size_human,status *.jpg
  $(basename "$0") --report
  $(basename "$0") --report save
  $(basename "$0") --report both
  $(basename "$0") --term-width 140 --report screen *.jpg
EOF
}

human_bytes() {
  awk -v b="$1" 'BEGIN {
    if (b < 1024) printf "%d B", b;
    else if (b < 1024*1024) printf "%.1f KiB", b/1024;
    else printf "%.2f MiB", b/(1024*1024);
  }'
}

bool_word() {
  if [ "$1" -eq 1 ]; then
    printf "PASS"
  else
    printf "FAIL"
  fi
}

expand_input_patterns() {
  if [ "$#" -eq 0 ]; then
    find . -maxdepth 1 -type f \( -iname "*.jpg" -o -iname "*.jpeg" \) | sort
    return
  fi

  while [ "$#" -gt 0 ]; do
    arg="$1"
    shift

    if [ -e "$arg" ]; then
      [ -f "$arg" ] && printf "%s\n" "$arg"
      continue
    fi

    compgen -G "$arg" 2>/dev/null | while IFS= read -r match; do
      [ -f "$match" ] && printf "%s\n" "$match"
    done
  done
}

get_exiftool_value() {
  file="$1"
  tag="$2"
  "$EXIFTOOL" -s3 "-$tag" "$file" 2>/dev/null | head -n 1
}

field_value() {
  row="$1"
  key="$2"
  printf "%s\n" "$row" | awk -F '\t' -v k="$key" '
    BEGIN {
      idx["name"]=1
      idx["width"]=2
      idx["height"]=3
      idx["dimensions"]=4
      idx["colorspace"]=5
      idx["size"]=6
      idx["size_human"]=7
      idx["meta_camera"]=8
      idx["meta_software"]=9
      idx["pass_width"]=10
      idx["pass_height"]=11
      idx["pass_edge"]=12
      idx["pass_colorspace"]=13
      idx["pass_size"]=14
      idx["pass_camera"]=15
      idx["pass_software"]=16
      idx["status"]=17
    }
    {
      print $(idx[k])
    }'
}

detect_term_width() {

  # Respect explicit user setting
  if [ -n "${TERM_WIDTH:-}" ]; then
    return
  fi

  # 1. Shell-provided width
  if [ -n "${COLUMNS:-}" ] && [[ "${COLUMNS}" =~ ^[0-9]+$ ]] && [ "$COLUMNS" -gt 0 ]; then
    TERM_WIDTH="$COLUMNS"
  fi

  # 2. TTY width
  if [ -z "${TERM_WIDTH:-}" ]; then
    cols="$(stty size 2>/dev/null | awk '{print $2}')"
    if [[ "$cols" =~ ^[0-9]+$ ]] && [ "$cols" -gt 0 ]; then
      TERM_WIDTH="$cols"
    fi
  fi

  # 3. tput as lower-priority fallback only
  if [ -z "${TERM_WIDTH:-}" ]; then
    if command -v tput >/dev/null 2>&1; then
      cols="$(tput cols 2>/dev/null)"
      if [[ "$cols" =~ ^[0-9]+$ ]] && [ "$cols" -gt 0 ]; then
        TERM_WIDTH="$cols"
      fi
    fi
  fi

  # 4. Final fallback
  if [ -z "${TERM_WIDTH:-}" ] || ! [[ "$TERM_WIDTH" =~ ^[0-9]+$ ]] || [ "$TERM_WIDTH" -lt 40 ]; then
    TERM_WIDTH="$DEFAULT_TERM_WIDTH"
  fi
}

calc_col_width() {
  field_count="$1"
  usable_width=$((TERM_WIDTH - 1))
  calc_width=$((usable_width / field_count))

  if [ "$calc_width" -gt "$MAX_COL_WIDTH" ]; then
    calc_width="$MAX_COL_WIDTH"
  fi

  if [ "$calc_width" -lt 12 ]; then
    calc_width=12
  fi

  printf "%s" "$calc_width"
}

print_padded() {
  value="$1"
  width="$2"

  short="$(printf "%s" "$value" | awk -v w="$width" '
    {
      if (length($0) > w) {
        if (w > 3) {
          print substr($0,1,w-3) "..."
        } else {
          print substr($0,1,w)
        }
      } else {
        print
      }
    }')"

  printf "%-*s" "$width" "$short"
}

make_report_text() {
  {
    echo "CAPA JPEG Validation Script"
    echo "Author : Robert Miller"
    echo "Purpose: Validate JPEG files for dimensions, colour space, file size, and metadata"

    echo
    echo "Requirements:"
    echo "  Max width            : $MAX_WIDTH px"
    echo "  Max height           : $MAX_HEIGHT px"
    echo "  Max file size (MB)   : ${MAX_FILE_MB} MB"
    echo "  Max file size (MiB)  : $(human_bytes "$MAX_FILE_BYTES") ($MAX_FILE_BYTES bytes)"
    echo "  Required colour space: $REQUIRED_COLORSPACE"
    echo "  Require camera meta  : $(yes_no "$REQUIRE_CAMERA_METADATA")"
    echo "  Require software meta: $(yes_no "$REQUIRE_SOFTWARE_METADATA")"
    echo "  Require one edge max : $(yes_no "$REQUIRE_ONE_EDGE_AT_MAX")"
    echo "  Display width        : $TERM_WIDTH"
    echo
    echo "Files scanned:"
    cat "$TMP_FILES"
    echo
    echo "Detailed results:"
    echo

    tail -n +2 "$TMP_RESULTS" | while IFS= read -r row; do
      name="$(field_value "$row" "name")"
      row_status="$(field_value "$row" "status")"

      case "$SHOW_MODE" in
        failed)
          [ "$row_status" != "FAIL" ] && continue
          ;;
        all|summary)
          ;;
      esac

      echo "File             : $name"
      echo "Status           : $(field_value "$row" "status")"
      echo "Width            : $(field_value "$row" "width")"
      echo "Height           : $(field_value "$row" "height")"
      echo "Dimensions       : $(field_value "$row" "dimensions")"
      echo "Colour space     : $(field_value "$row" "colorspace")"
      echo "File size (bytes): $(field_value "$row" "size")"
      echo "File size        : $(field_value "$row" "size_human")"
      echo "Camera metadata  : $(field_value "$row" "meta_camera")"
      echo "Software metadata: $(field_value "$row" "meta_software")"
      echo "Pass width       : $(field_value "$row" "pass_width")"
      echo "Pass height      : $(field_value "$row" "pass_height")"
      echo "Pass edge max    : $(field_value "$row" "pass_edge")"
      echo "Pass colourspace : $(field_value "$row" "pass_colorspace")"
      echo "Pass file size   : $(field_value "$row" "pass_size")"
      echo "Pass camera meta : $(field_value "$row" "pass_camera")"
      echo "Pass software    : $(field_value "$row" "pass_software")"
      echo
    done

    echo "Summary:"
    echo "  Total files : $TOTAL"
    echo "  Passed      : $PASSED"
    echo "  Failed      : $FAILED"
  }
}

show_report_screen() {
  if command -v less >/dev/null 2>&1; then
    make_report_text | less
  else
    make_report_text
  fi
}

save_report_file() {
  ts="$(date +"%Y-%m-%d_%H-%M-%S")"
  [ -z "$FIRST_DIR" ] && FIRST_DIR="."
  REPORT_FILE="$FIRST_DIR/capacheck_report_$ts.txt"
  make_report_text > "$REPORT_FILE"
  echo
  echo "Report written to:"
  echo "  $REPORT_FILE"
}

# ------------------------------------------------------------------
# Parse arguments
# ------------------------------------------------------------------
INPUTS=""
recalc_size_values

while [ "$#" -gt 0 ]; do
  case "$1" in
    --all)
      SHOW_MODE="all"
      shift
      ;;
    --failed)
      SHOW_MODE="failed"
      shift
      ;;
    --summary)
      SHOW_MODE="summary"
      shift
      ;;
    --fields)
      [ "$#" -lt 2 ] && { echo "Missing value for --fields" >&2; usage; exit 2; }
      FIELDS="$2"
      shift 2
      ;;
    --report)
      REPORT_MODE="screen"
      if [ "$#" -ge 2 ]; then
        case "$2" in
          screen|save|both)
            REPORT_MODE="$2"
            shift 2
            ;;
          *)
            shift
            ;;
        esac
      else
        shift
      fi
      ;;
    --no-prompt)
      PROMPT_FIRST=0
      shift
      ;;
    --max-width)
      [ "$#" -lt 2 ] && { echo "Missing value for --max-width" >&2; usage; exit 2; }
      MAX_WIDTH="$2"
      shift 2
      ;;
    --max-height)
      [ "$#" -lt 2 ] && { echo "Missing value for --max-height" >&2; usage; exit 2; }
      MAX_HEIGHT="$2"
      shift 2
      ;;
    --max-bytes)
      [ "$#" -lt 2 ] && { echo "Missing value for --max-bytes" >&2; usage; exit 2; }
      MAX_FILE_BYTES="$2"
      recalc_size_values
      shift 2
      ;;
    --colorspace)
      [ "$#" -lt 2 ] && { echo "Missing value for --colorspace" >&2; usage; exit 2; }
      REQUIRED_COLORSPACE="$2"
      shift 2
      ;;
    --no-edge-max)
      REQUIRE_ONE_EDGE_AT_MAX=0
      shift
      ;;
    --no-camera-meta)
      REQUIRE_CAMERA_METADATA=0
      shift
      ;;
    --no-software-meta)
      REQUIRE_SOFTWARE_METADATA=0
      shift
      ;;
    --term-width)
      [ "$#" -lt 2 ] && { echo "Missing value for --term-width" >&2; usage; exit 2; }
      TERM_WIDTH="$2"
      shift 2
      ;;
    --help|-h|-?)
      usage
      exit 0
      ;;
    *)
      INPUTS="${INPUTS}
$1"
      shift
      ;;
  esac
done

detect_term_width

# ------------------------------------------------------------------
# Validate environment
# ------------------------------------------------------------------
if ! have_cmd "$IDENTIFY"; then
  echo "Error: identify not found at $IDENTIFY" >&2
  exit 1
fi

if ! have_cmd "$EXIFTOOL"; then
  echo "Error: exiftool not found at $EXIFTOOL" >&2
  exit 1
fi

echo "CAPA JPEG Validation Script"
echo "Author : Robert Miller"
echo "Purpose: Validate JPEG files for dimensions, colour space, file size, and metadata"
echo
echo "Notes:"
echo "   Designed for macOS with ExifTool and ImageMagick installed."
echo "   Compatible with Linux and other UNIX-based systems (e.g. Canada's OpenBSD)."
echo "   Update IDENTIFY and EXIFTOOL paths if required for your environment."
echo
echo "Requirements:"
echo "  Max width            : $MAX_WIDTH px"
echo "  Max height           : $MAX_HEIGHT px"
echo "  Max file size (MB)   : ${MAX_FILE_MB} MB"
echo "  Max file size (MiB)  : $(human_bytes "$MAX_FILE_BYTES") ($MAX_FILE_BYTES bytes)"
echo "  Required colour space: $REQUIRED_COLORSPACE"
echo "  Require camera meta  : $(yes_no "$REQUIRE_CAMERA_METADATA")"
echo "  Require software meta: $(yes_no "$REQUIRE_SOFTWARE_METADATA")"
echo "  Require one edge max : $(yes_no "$REQUIRE_ONE_EDGE_AT_MAX")"
echo "  Display width        : $TERM_WIDTH"
echo

TMP_FILES="$(mktemp)"
TMP_RESULTS="$(mktemp)"
trap 'rm -f "$TMP_FILES" "$TMP_RESULTS"' EXIT

if [ -n "$INPUTS" ]; then
  OLDIFS="$IFS"
  IFS='
'
  set -- $INPUTS
  IFS="$OLDIFS"
  expand_input_patterns "$@" | awk '!seen[$0]++' > "$TMP_FILES"
else
  expand_input_patterns | awk '!seen[$0]++' > "$TMP_FILES"
fi

if [ ! -s "$TMP_FILES" ]; then
  echo "No JPEG files found."
  exit 1
fi

echo "JPEG files found:"
nl -w2 -s'. ' "$TMP_FILES"
echo

if [ "$PROMPT_FIRST" -eq 1 ]; then
  printf "Continue scan? [y/N]: "
  read ans
  case "$ans" in
    y|Y|yes|YES) ;;
    *) echo "Aborted."; exit 0 ;;
  esac
fi

printf "name\twidth\theight\tdimensions\tcolorspace\tsize\tsize_human\tmeta_camera\tmeta_software\tpass_width\tpass_height\tpass_edge\tpass_colorspace\tpass_size\tpass_camera\tpass_software\tstatus\n" > "$TMP_RESULTS"

FIRST_DIR=""
TOTAL=0
PASSED=0
FAILED=0

while IFS= read -r file; do
  [ -z "$FIRST_DIR" ] && FIRST_DIR="$(dirname "$file")"

  width="$("$IDENTIFY" -quiet -format "%w" "$file" 2>/dev/null)"
  height="$("$IDENTIFY" -quiet -format "%h" "$file" 2>/dev/null)"
  colorspace="$("$IDENTIFY" -quiet -format "%[colorspace]" "$file" 2>/dev/null)"
  size_bytes="$(stat -f%z "$file" 2>/dev/null)"
  size_human="$(human_bytes "$size_bytes")"

  make="$(get_exiftool_value "$file" "Make")"
  model="$(get_exiftool_value "$file" "Model")"
  software="$(get_exiftool_value "$file" "Software")"

  meta_camera=""
  if [ -n "$make" ] && [ -n "$model" ]; then
    meta_camera="$make / $model"
  elif [ -n "$make" ]; then
    meta_camera="$make"
  elif [ -n "$model" ]; then
    meta_camera="$model"
  fi

  dimensions="${width}x${height}"

  pass_width=0
  pass_height=0
  pass_edge=1
  pass_colorspace=0
  pass_size=0
  pass_camera=1
  pass_software=1

  [ "$width" -le "$MAX_WIDTH" ] && pass_width=1
  [ "$height" -le "$MAX_HEIGHT" ] && pass_height=1
  [ "$size_bytes" -le "$MAX_FILE_BYTES" ] && pass_size=1

  lc_colorspace="$(printf "%s" "$colorspace" | tr '[:upper:]' '[:lower:]')"
  lc_required="$(printf "%s" "$REQUIRED_COLORSPACE" | tr '[:upper:]' '[:lower:]')"
  [ "$lc_colorspace" = "$lc_required" ] && pass_colorspace=1

  if [ "$REQUIRE_ONE_EDGE_AT_MAX" -eq 1 ]; then
    if [ "$width" -eq "$MAX_WIDTH" ] || [ "$height" -eq "$MAX_HEIGHT" ]; then
      pass_edge=1
    else
      pass_edge=0
    fi
  fi

  if [ "$REQUIRE_CAMERA_METADATA" -eq 1 ] && [ -z "$meta_camera" ]; then
    pass_camera=0
  fi

  if [ "$REQUIRE_SOFTWARE_METADATA" -eq 1 ] && [ -z "$software" ]; then
    pass_software=0
  fi

  status="PASS"
  if [ "$pass_width" -ne 1 ] || [ "$pass_height" -ne 1 ] || \
     [ "$pass_edge" -ne 1 ] || [ "$pass_colorspace" -ne 1 ] || \
     [ "$pass_size" -ne 1 ] || [ "$pass_camera" -ne 1 ] || \
     [ "$pass_software" -ne 1 ]; then
    status="FAIL"
  fi

  printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
    "$file" \
    "$width" \
    "$height" \
    "$dimensions" \
    "$colorspace" \
    "$size_bytes" \
    "$size_human" \
    "$meta_camera" \
    "$software" \
    "$(bool_word "$pass_width")" \
    "$(bool_word "$pass_height")" \
    "$(bool_word "$pass_edge")" \
    "$(bool_word "$pass_colorspace")" \
    "$(bool_word "$pass_size")" \
    "$(bool_word "$pass_camera")" \
    "$(bool_word "$pass_software")" \
    "$status" >> "$TMP_RESULTS"

  TOTAL=$((TOTAL + 1))
  if [ "$status" = "PASS" ]; then
    PASSED=$((PASSED + 1))
  else
    FAILED=$((FAILED + 1))
  fi
done < "$TMP_FILES"

# ------------------------------------------------------------------
# Print results
# ------------------------------------------------------------------
OLDIFS="$IFS"
IFS=','
set -- $FIELDS
IFS="$OLDIFS"

FIELD_LIST="$*"
FIELD_COUNT=$#
COL_WIDTH="$(calc_col_width "$FIELD_COUNT")"

print_row() {
  row="$1"
  for f in $FIELD_LIST; do
    value="$(field_value "$row" "$f")"
    print_padded "$value" "$COL_WIDTH"
  done
  printf "\n"
}

for f in $FIELD_LIST; do
  print_padded "$f" "$COL_WIDTH"
done
printf "\n"

for f in $FIELD_LIST; do
  print_padded "$(printf '%*s' "$COL_WIDTH" '' | tr ' ' '-')" "$COL_WIDTH"
done
printf "\n"

tail -n +2 "$TMP_RESULTS" | while IFS= read -r row; do
  row_status="$(field_value "$row" "status")"

  case "$SHOW_MODE" in
    failed)
      [ "$row_status" != "FAIL" ] && continue
      ;;
    all|summary)
      ;;
  esac

  print_row "$row"
done

echo
echo "Summary:"
echo "  Total files : $TOTAL"
echo "  Passed      : $PASSED"
echo "  Failed      : $FAILED"

# ------------------------------------------------------------------
# Optional report
# ------------------------------------------------------------------
if [ -n "$REPORT_MODE" ]; then
  case "$REPORT_MODE" in
    screen)
      show_report_screen
      ;;
    save)
      save_report_file
      ;;
    both)
      save_report_file
      show_report_screen
      ;;
    *)
      echo "Error: invalid report mode '$REPORT_MODE'" >&2
      exit 2
      ;;
  esac
fi

exit 0