Image compressors on Kodak

Image compressors on Kodak#

Side-by-side rate-distortion and encoding-throughput plots, sourced from the JSONs hardcoded below. RD metrics (bpp, PSNR, SSIM, DISTS) come from rate_distortion_<id>.json measured at native Kodak resolution; encode throughput (MPx/s) comes from encode_<id>.json measured on 512x512 center crops. The two are joined on the swept dimension so each operating point shares an encoder configuration but not a sample geometry.

Pick which codecs to plot by commenting / uncommenting lines in the CODECS list below.

Hide code cell source

import json
from pathlib import Path
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "serif"

Hide code cell source

RD_PATHS = {
    "frappe":          "results/frappe/rate_distortion_1777315303.json",
    "jpeg":            "results/jpeg/rate_distortion_1777321251.json",
    "avif_default":    "results/avif/rate_distortion_1777321273.json",
    "avif_speed0":     "results/avif/rate_distortion_1777567112.json",
    "avif_speed10":    "results/avif/rate_distortion_speed10_1777561281.json",
    "jxl":             "results/jxl/rate_distortion_1777567114.json",
    "ldm_sr":          "results/ldm_sr_openimages/rate_distortion_1777418424.json",
    "vq_diff_ithq":    "results/vq_diffusion_ithq/rate_distortion_1777418464.json",
    "kandinsky_2_1":   "results/kandinsky_2_1/rate_distortion_1777418494.json",
    "stable_cascade":  "results/stable_cascade/rate_distortion_1777418663.json",
    "cheng2020":       "results/cheng2020/rate_distortion_1777567118.json",
    "mbt2018":         "results/mbt2018/rate_distortion_1777567120.json",
    "walloc":          "results/walloc/rate_distortion_1777562174.json",
    "liveaction":      "results/liveaction/rate_distortion_1777567116.json",
    "mcucoder_native": "results/mcucoder/rate_distortion_1777674947_native.json",
    "mcucoder_224":    "results/mcucoder/rate_distortion_1777679959_224.json",
}

EC_PATHS = {
    "frappe":          "results/frappe/encode_1777342722.json",
    "jpeg":            "results/jpeg/encode_1777321267.json",
    "avif_default":    "results/avif/encode_1777321337.json",
    "avif_speed0":     "results/avif/encode_1777572007.json",
    "avif_speed10":    "results/avif/encode_speed10_1777561318.json",
    "jxl":             "results/jxl/encode_1777587942.json",
    "ldm_sr":          "results/ldm_sr_openimages/encode_PENDING_.json",
    "vq_diff_ithq":    "results/vq_diffusion_ithq/encode_PENDING_.json",
    "kandinsky_2_1":   "results/kandinsky_2_1/encode_1777587591.json",
    "stable_cascade":  "results/stable_cascade/encode_PENDING_.json",
    "cheng2020":       "results/cheng2020/encode_1777582109.json",
    "mbt2018":         "results/mbt2018/encode_1777585754.json",
    "walloc":          "results/walloc/encode_1777571761.json",
    "liveaction":      "results/liveaction/encode_1777571794.json",
    "mcucoder_native": "results/mcucoder/encode_1777680434.json",
    "mcucoder_224":    "results/mcucoder/encode_1777680434.json",
}

AXIS_LABEL = {
    "bpp":            "Rate [bits per pixel]",
    "throughput_MPx": "Encoding throughput [MPx/s]",
    "PSNR_dB":        "PSNR [dB]",
    "SSIM":           "SSIM",
    "DISTS_dB":       "DISTS [dB]",
}
TITLE_LABEL = {
    "bpp":            "Rate",
    "throughput_MPx": "Throughput",
    "PSNR_dB":        "PSNR",
    "SSIM":           "SSIM",
    "DISTS_dB":       "DISTS",
}
LOG_KEYS = ("bpp", "throughput_MPx")

Hide code cell source

def load_codec(name):
    rd = json.loads(Path(RD_PATHS[name]).read_text())
    ec = json.loads(Path(EC_PATHS[name]).read_text())
    swept_key = "channel_counts" if "channel_counts" in rd else "quality_values"
    points = []
    for k in rd[swept_key]:
        s = str(k)
        if s not in ec["results"]:
            continue
        rm = rd["results"][s]["mean"]
        tp = ec["results"][s]["throughput"]
        points.append({
            "sweep": k,
            "bpp": rm["bpp"],
            "PSNR_dB": rm["PSNR_dB"],
            "SSIM": rm["SSIM"],
            "DISTS_dB": rm["DISTS_dB"],
            "throughput_MPx": tp["median_MPx_per_s"],
        })
    return points

def col(pts, key):
    return [p[key] for p in pts]

Hide code cell source

def plot_panel(group, y_key, x_key, legend=True):
    """One small panel: y vs x for every codec in `group`.

    bpp and throughput axes are log-scaled wherever they appear.
    """
    plt.figure(figsize=(4.5, 4), dpi=180)
    ax = plt.gca()
    if x_key in LOG_KEYS:
        ax.set_xscale("log")
    if y_key in LOG_KEYS:
        ax.set_yscale("log")
    for c in group:
        ax.plot(col(c["data"], x_key), col(c["data"], y_key),
                marker=c.get("marker", "."),
                linestyle=c.get("linestyle", "-"),
                color=c.get("color"), label=c["name"])
    ax.set_xlabel(AXIS_LABEL[x_key])
    ax.set_ylabel(AXIS_LABEL[y_key])
    ax.set_title(f"{TITLE_LABEL[y_key]} vs {TITLE_LABEL[x_key]} (Kodak)")
    if legend:
        ax.legend(loc="best")
    log_either = (x_key in LOG_KEYS) or (y_key in LOG_KEYS)
    ax.grid(True, which="both" if log_either else "major", alpha=0.4)
    plt.tight_layout()
    plt.show()

Hide code cell source

# Comment / uncomment to control which codecs appear on the plots
CODECS = [
    {"name": "JPEG",             "data": load_codec("jpeg"),           "color": "gray",   "linestyle": "-", "marker": "."},
    # {"name": "JPEG-XL",          "data": load_codec("jxl"),            "color": "tab:brown",  "linestyle": "-", "marker": "."},
    {"name": "AVIF",   "data": load_codec("avif_default"),   "color": "black", "linestyle": "-", "marker": "."},
    # {"name": "AVIF (speed=0)",   "data": load_codec("avif_speed0"),    "color": "tab:red",    "linestyle": "-", "marker": "."},
    # {"name": "AVIF (speed=10)",  "data": load_codec("avif_speed10"),   "color": "tab:pink",   "linestyle": "-", "marker": "."},
    {"name": "mbt2018",          "data": load_codec("mbt2018"),        "color": "red",       "linestyle": "-", "marker": "."},
    {"name": "cheng2020-anchor", "data": load_codec("cheng2020"),      "color": "firebrick",       "linestyle": "-", "marker": "."},
    # {"name": "LDM-SR",            "data": load_codec("ldm_sr"),         "color": "tab:purple", "linestyle": "-", "marker": "."},
    # {"name": "VQ-Diffusion ITHQ", "data": load_codec("vq_diff_ithq"),   "color": "tab:gray",   "linestyle": "-", "marker": "."},
    {"name": "Kandinsky 2.1",     "data": load_codec("kandinsky_2_1"),  "color": "orange",  "linestyle": "-", "marker": "."},
    # {"name": "Stable Cascade",    "data": load_codec("stable_cascade"), "color": "tab:cyan",   "linestyle": "-", "marker": "."},
    {"name": "WaLLoC",           "data": load_codec("walloc"),         "color": "blue",    "linestyle": "-", "marker": "."},
    {"name": "LiVeAction",       "data": load_codec("liveaction"),     "color": "green",      "linestyle": "-", "marker": "."},
    # {"name": "MCUCoder",         "data": load_codec("mcucoder_native"), "color": "green",    "linestyle": "-", "marker": "."},
    # {"name": "MCUCoder (224x224)", "data": load_codec("mcucoder_224"), "color": "tab:green",  "linestyle": "-", "marker": "."},
    {"name": "FRAPPE",           "data": load_codec("frappe"),         "color": "tab:purple",  "linestyle": "-", "marker": "."},
]

for c in CODECS:
    print(f"{c['name']:>20}: {len(c['data'])} points,"
          f" bpp [{min(col(c['data'],'bpp')):.4f}, {max(col(c['data'],'bpp')):.4f}],"
          f" throughput [{min(col(c['data'],'throughput_MPx')):.3f}, {max(col(c['data'],'throughput_MPx')):.3f}] MPx/s")
                JPEG: 21 points, bpp [0.1731, 6.7867], throughput [212.354, 543.966] MPx/s
                AVIF: 21 points, bpp [0.0681, 5.8218], throughput [1.970, 6.037] MPx/s
             mbt2018: 8 points, bpp [0.1102, 1.5837], throughput [0.156, 0.174] MPx/s
    cheng2020-anchor: 6 points, bpp [0.1174, 0.8041], throughput [0.046, 0.094] MPx/s
       Kandinsky 2.1: 3 points, bpp [0.0625, 0.2500], throughput [0.207, 0.757] MPx/s
              WaLLoC: 10 points, bpp [0.0097, 0.6171], throughput [14.188, 74.832] MPx/s
          LiVeAction: 9 points, bpp [0.0341, 0.6921], throughput [1.542, 1.932] MPx/s
              FRAPPE: 21 points, bpp [0.0042, 0.9418], throughput [59.575, 914.938] MPx/s

Hide code cell source

plot_panel(CODECS, "PSNR_dB",  "bpp")
_images/994e41e5cf7103d3592fc04f2f08d35dda241dd95bceaebb7871b2d937d09392.webp

Hide code cell source

plot_panel(CODECS, "throughput_MPx", "PSNR_dB", legend=False)
_images/f201ea7d6ef6cb940fd5299951d68fc5e96addaff580603ca9738d4708eb2a10.webp

Hide code cell source

plot_panel(CODECS, "SSIM",     "bpp",            legend=False)
_images/b99e6df86d4d4f8d4a1a29ab9785cac1612a52b6ab16bf24d276b79c45a1b981.webp

Hide code cell source

plot_panel(CODECS, "DISTS_dB", "bpp",            legend=False)
_images/e2937b9e14cf9e29183dbf13c67896278d49cad1cc453c396d8516483f0667c1.webp