From 8accafbe593a0a8a3628504d9126cb1ff439b812 Mon Sep 17 00:00:00 2001
From: Deivid Soto <hello@torrentclaw.com>
Date: Mon, 1 Jun 2026 08:29:10 +0200
Subject: [PATCH 1/2] fix(stream): derive H.264 level from frame macroblocks,
 not height

Anamorphic 2.39:1 scaled to 1080 height = ~2586x1080 = 11016 MBs, busting
level 4.1's 8192-MB MaxFS -> nvenc "InitializeEncoder failed: Invalid Level"
(libx264: "frame MB size > level limit") -> 0 segments, session stalls. Most
4K rips are 2.39:1, so HLS playback was silently broken for them.

H264LevelForFrame(w,h) derives the level from the real macroblock count
(max of MB-tier and height-tier). hls.go computes output width and uses it.
16:9 unchanged; anamorphic bumps to 5.0 when needed. Discovered + verified
during the trickplay smoke.
---
 internal/engine/hls.go          | 20 ++++++++----
 internal/engine/hwaccel.go      | 57 +++++++++++++++++++++++++++++++++
 internal/engine/hwaccel_test.go | 42 ++++++++++++++++++++----
 3 files changed, 107 insertions(+), 12 deletions(-)

diff --git a/internal/engine/hls.go b/internal/engine/hls.go
index 8e0868a..75cf991 100644
--- a/internal/engine/hls.go
+++ b/internal/engine/hls.go
@@ -22,6 +22,7 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"math"
 	"net/http"
 	"os"
 	"os/exec"
@@ -1184,11 +1185,14 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin
 		// per session start, polluting logs even though encode succeeds.
 		args = append(args, "-vaapi_device", "/dev/dri/renderD128")
 	}
-	// Derive H.264 level from the actual output height. A fixed "4.0" caps the
-	// encoder at 1080p — anything taller (1440p, 4K source on quality=original)
-	// fails libx264 with "frame MB size > level limit" and emits unplayable
-	// segments. The output height matches qcap.MaxHeight when the source is
-	// downscaled, otherwise probe.Height (already populated by ffprobe).
+	// Derive H.264 level from the actual output FRAME (width × height), not just
+	// height. A fixed "4.0" caps the encoder at 1080p; deriving by height alone
+	// still under-levels anamorphic content — a 2.39:1 source scaled to 1080
+	// height is ~2586×1080 = 11016 MBs, busting level 4.1's 8192-MB cap, which
+	// fails the encode ("Invalid Level" on nvenc, "frame MB size > level limit"
+	// on libx264) and stalls the session. The output height matches qcap.MaxHeight
+	// when the source is downscaled, otherwise probe.Height; the output width is
+	// the source width scaled by the same factor (the filter chain preserves AR).
 	qcap := resolveQualityCap(cfg.Quality)
 	outputHeight := qcap.MaxHeight
 	if outputHeight == 0 {
@@ -1197,7 +1201,11 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin
 	if outputHeight == 0 || (probe.Height > 0 && probe.Height < outputHeight) {
 		outputHeight = probe.Height
 	}
-	args = append(args, "-profile:v", "main", "-level:v", H264LevelForHeight(outputHeight))
+	outputWidth := probe.Width
+	if probe.Height > 0 && outputHeight != probe.Height {
+		outputWidth = int(math.Round(float64(probe.Width) * float64(outputHeight) / float64(probe.Height)))
+	}
+	args = append(args, "-profile:v", "main", "-level:v", H264LevelForFrame(outputWidth, outputHeight))
 
 	// Bitrate must match the level libx264 actually picks for outputHeight,
 	// not the qcap target for the user's requested label. If a user asks for
diff --git a/internal/engine/hwaccel.go b/internal/engine/hwaccel.go
index d7d1bd4..5b5907a 100644
--- a/internal/engine/hwaccel.go
+++ b/internal/engine/hwaccel.go
@@ -271,3 +271,60 @@ func H264LevelForHeight(height int) string {
 		return "6.0"
 	}
 }
+
+// h264LevelRank orders level strings so callers can pick the higher of two.
+var h264LevelRank = map[string]int{
+	"3.0": 30, "3.1": 31, "3.2": 32,
+	"4.0": 40, "4.1": 41, "4.2": 42,
+	"5.0": 50, "5.1": 51, "6.0": 60,
+}
+
+// levelForMacroblocks returns the lowest H.264 level whose MaxFS (frame size in
+// macroblocks) covers `mbs`. The height-based H264LevelForHeight tier is correct
+// for 16:9, but anamorphic content (2.39:1 cinemascope) scaled to a given height
+// has a much wider frame: a 2.39:1 source downscaled to 1080 height becomes
+// ~2586×1080 = 11016 MBs, which busts level 4.1's 8192-MB MaxFS. ffmpeg then
+// fails the encode — libx264 with "frame MB size > level limit", h264_nvenc with
+// "InitializeEncoder failed: invalid param (8): Invalid Level" — and emits zero
+// packets (the whole HLS session stalls at "preparando sesión"). MaxFS values
+// from the H.264 spec, Table A-1.
+func levelForMacroblocks(mbs int) string {
+	switch {
+	case mbs <= 1620:
+		return "3.0"
+	case mbs <= 3600:
+		return "3.1"
+	case mbs <= 5120:
+		return "3.2"
+	case mbs <= 8192: // levels 4.0 and 4.1 share MaxFS 8192; pick 4.1 for headroom
+		return "4.1"
+	case mbs <= 8704:
+		return "4.2"
+	case mbs <= 22080:
+		return "5.0"
+	case mbs <= 36864:
+		return "5.1"
+	default:
+		return "6.0"
+	}
+}
+
+// H264LevelForFrame returns the lowest H.264 level that satisfies BOTH the
+// height-derived tier (which carries macroblock-rate / fps headroom) and the
+// actual frame's macroblock count (which catches anamorphic frames that are far
+// wider than 16:9 at a given height). Use this instead of H264LevelForHeight
+// wherever the output width is known — it never under-levels an ultra-wide
+// frame, and for 16:9 content it returns exactly what H264LevelForHeight does.
+func H264LevelForFrame(width, height int) string {
+	byHeight := H264LevelForHeight(height)
+	if width <= 0 || height <= 0 {
+		return byHeight
+	}
+	// Macroblocks are 16×16; partial blocks at the edge still count (ceil).
+	mbs := ((width + 15) / 16) * ((height + 15) / 16)
+	byMB := levelForMacroblocks(mbs)
+	if h264LevelRank[byMB] > h264LevelRank[byHeight] {
+		return byMB
+	}
+	return byHeight
+}
diff --git a/internal/engine/hwaccel_test.go b/internal/engine/hwaccel_test.go
index cf3bec2..35bb08a 100644
--- a/internal/engine/hwaccel_test.go
+++ b/internal/engine/hwaccel_test.go
@@ -81,12 +81,12 @@ func TestResolveEncoderProfileHonoursConfiguredPreset(t *testing.T) {
 		configured string
 		wantPreset string
 	}{
-		{HWAccelNone, "ultrafast", "ultrafast"},  // libx264 honours
-		{HWAccelNone, "medium", "medium"},        // libx264 honours
-		{HWAccelNVENC, "p1", "p3"},               // NVENC ignores, sticks to p3
-		{HWAccelNVENC, "veryfast", "p3"},         // NVENC ignores libx264 vocab
-		{HWAccelQSV, "veryslow", "veryfast"},     // QSV ignores, sticks to veryfast
-		{HWAccelVideoToolbox, "veryfast", ""},    // VideoToolbox has no preset
+		{HWAccelNone, "ultrafast", "ultrafast"}, // libx264 honours
+		{HWAccelNone, "medium", "medium"},       // libx264 honours
+		{HWAccelNVENC, "p1", "p3"},              // NVENC ignores, sticks to p3
+		{HWAccelNVENC, "veryfast", "p3"},        // NVENC ignores libx264 vocab
+		{HWAccelQSV, "veryslow", "veryfast"},    // QSV ignores, sticks to veryfast
+		{HWAccelVideoToolbox, "veryfast", ""},   // VideoToolbox has no preset
 	}
 	for _, tc := range cases {
 		got := ResolveEncoderProfile(tc.hw, tc.configured)
@@ -154,3 +154,33 @@ func TestHWAccelDiagnosticLogLineSoftwareButEncodersFound(t *testing.T) {
 	}
 }
 
+func TestH264LevelForFrame(t *testing.T) {
+	cases := []struct {
+		name          string
+		width, height int
+		want          string
+	}{
+		// 16:9 must match the height-only helper exactly (no regression).
+		{"720p 16:9", 1280, 720, "4.0"},
+		{"1080p 16:9", 1920, 1080, "4.1"},
+		{"1440p 16:9", 2560, 1440, "5.0"},
+		{"2160p 16:9", 3840, 2160, "5.1"},
+		// Anamorphic 2.39:1 at 1080 height — the regression: ~2586×1080 = 11016
+		// MBs busts level 4.1 (8192 MaxFS); must bump to 5.0.
+		{"1080h anamorphic 2.39:1", 2586, 1080, "5.0"},
+		// Anamorphic 720 height (1728×720 = 4860 MBs) still fits the 4.0 the
+		// height floor already picks for fps headroom.
+		{"720h anamorphic 2.4:1", 1728, 720, "4.0"},
+		// Source 4K anamorphic (3840×1604) encoded at source: 24240 MBs → 5.1.
+		{"4K anamorphic source", 3840, 1604, "5.1"},
+		// Width unknown → fall back to the height-only tier.
+		{"width unknown", 0, 1080, "4.1"},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			if got := H264LevelForFrame(c.width, c.height); got != c.want {
+				t.Errorf("H264LevelForFrame(%d,%d) = %q, want %q", c.width, c.height, got, c.want)
+			}
+		})
+	}
+}

From c4ddd44a1a43be15918ed656d71d0589e145f2c0 Mon Sep 17 00:00:00 2001
From: Deivid Soto <hello@torrentclaw.com>
Date: Mon, 1 Jun 2026 19:36:41 +0200
Subject: [PATCH 2/2] feat(docker): glibc base with nvenc ffmpeg + par2/7z
 extractors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alpine/musl can't run NVIDIA's glibc userspace (nvidia-smi, libnvidia-encode,
the static nvenc ffmpeg), so HW transcode was impossible — every 4K/anamorphic
HLS encode fell back to software or failed. Switch the runtime stage to
debian:bookworm-slim + a static BtbN ffmpeg built with nvenc, add par2
(Usenet segment repair) + 7z (RAR/7z extraction), and set
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility so a plain --gpus all (or the
compose device reservation) lights up nvenc with no extra flags. Falls back to
libx264 automatically when no GPU is attached. Build stage cross-compiles
(--platform=BUILDPLATFORM) so multi-arch stays fast; downloads forced over IPv4.
---
 Dockerfile         | 67 ++++++++++++++++++++++++++++++++++++----------
 docker-compose.yml | 14 +++++++++-
 2 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 64ea4e2..7bb1416 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,8 @@
 # ---- Build stage ----
-FROM golang:1.25-alpine AS builder
+# Pin the builder to the host's native arch and cross-compile (CGO is off, so
+# Go cross-compiles trivially). During multi-arch buildx this keeps `go build`
+# at native speed instead of compiling under QEMU emulation for the foreign arch.
+FROM --platform=$BUILDPLATFORM golang:1.25-alpine AS builder
 
 RUN apk add --no-cache git ca-certificates
 
@@ -13,34 +16,63 @@ RUN go mod download
 COPY . .
 
 ARG VERSION=dev
-RUN CGO_ENABLED=0 go build -ldflags="-s -w -X github.com/torrentclaw/unarr/internal/cmd.Version=${VERSION}" -trimpath -o /unarr ./cmd/unarr/
+ARG TARGETOS
+ARG TARGETARCH
+RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -ldflags="-s -w -X github.com/torrentclaw/unarr/internal/cmd.Version=${VERSION}" -trimpath -o /unarr ./cmd/unarr/
 
 # ---- Runtime stage ----
-FROM alpine:3.22
+# glibc base (not Alpine/musl). NVIDIA's userspace — nvidia-smi and the
+# libnvidia-encode / libcuda libs that `--gpus all` injects, plus the static
+# BtbN ffmpeg that links nvenc — are all glibc ELF. On musl they fail with
+# "no such file or directory" (missing glibc loader), so HW transcode is
+# impossible on Alpine. bookworm-slim is the smallest base that runs the full
+# NVIDIA stack while still falling back to software libx264 when no GPU is
+# passed in.
+FROM debian:bookworm-slim
 
-# Use Alpine's native musl ffmpeg + ffprobe instead of the johnvansickle /
-# BtbN static glibc builds — those need a glibc shim on Alpine and the
-# vector-math symbols the GPL builds reference are not satisfiable by
-# gcompat. Alpine ships ffmpeg ~7.x which is fine for the HLS transcoding
-# pipeline (libx264 + libfdk-aac alternatives included).
-RUN apk upgrade --no-cache && \
-    apk add --no-cache ca-certificates tzdata ffmpeg wget
+# par2  → repair corrupted Usenet segments (without it a single bad segment
+#         silently corrupts the output).
+# 7z    → archive extractor for RAR/7z-packed downloads (p7zip-full also reads
+#         RAR5, so unrar — unavailable as a free Debian package — isn't needed).
+# tzdata/ca-certificates → TLS + correct local time for schedules/logs.
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+      ca-certificates tzdata wget xz-utils par2 p7zip-full && \
+    rm -rf /var/lib/apt/lists/*
+
+# TARGETARCH is set automatically by Docker buildx during cross-builds.
+ARG TARGETARCH=amd64
+
+# Static GPL ffmpeg + ffprobe with nvenc compiled in (BtbN builds). nvenc is
+# linked but the actual libnvidia-encode.so is dlopen'd at runtime from the
+# host driver that `--gpus all` exposes — so the same binary does HW transcode
+# when a GPU is present and falls back to libx264 when it isn't. Placed in
+# /usr/local/bin so ResolveFFmpeg picks them up off PATH ahead of any distro
+# ffmpeg. arm64 has no nvenc but the build still serves software transcode.
+RUN case "$TARGETARCH" in \
+      amd64) FF_ARCH=linux64 ;; \
+      arm64) FF_ARCH=linuxarm64 ;; \
+      *)     echo "unsupported TARGETARCH=$TARGETARCH" >&2; exit 1 ;; \
+    esac && \
+    wget -4 --tries=3 --timeout=30 -qO /tmp/ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-${FF_ARCH}-gpl.tar.xz" && \
+    mkdir -p /tmp/ff && tar -xJf /tmp/ffmpeg.tar.xz -C /tmp/ff --strip-components=1 && \
+    cp /tmp/ff/bin/ffmpeg /tmp/ff/bin/ffprobe /usr/local/bin/ && \
+    chmod +x /usr/local/bin/ffmpeg /usr/local/bin/ffprobe && \
+    rm -rf /tmp/ffmpeg.tar.xz /tmp/ff
 
 # Bundle cloudflared so `unarr funnel on` (default: on, see config defaults)
 # Just Works on a headless container with no first-run network round-trip.
-# TARGETARCH is set automatically by Docker buildx during cross-builds.
-ARG TARGETARCH=amd64
 RUN case "$TARGETARCH" in \
       amd64) CF_ARCH=amd64 ;; \
       arm64) CF_ARCH=arm64 ;; \
       arm)   CF_ARCH=armhf ;; \
       *)     echo "unsupported TARGETARCH=$TARGETARCH" >&2; exit 1 ;; \
     esac && \
-    wget -qO /usr/local/bin/cloudflared "https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-$CF_ARCH" && \
+    wget -4 --tries=3 --timeout=30 -qO /usr/local/bin/cloudflared "https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-$CF_ARCH" && \
     chmod +x /usr/local/bin/cloudflared
 
 # Non-root user (UID 1000 matches typical host user for volume permissions)
-RUN addgroup -g 1000 unarr && adduser -u 1000 -G unarr -D -h /home/unarr unarr
+RUN groupadd -g 1000 unarr && useradd -u 1000 -g 1000 -m -d /home/unarr unarr
 
 # Default directories
 RUN mkdir -p /config /downloads /data && \
@@ -55,6 +87,13 @@ ENV UNARR_CONFIG_DIR=/config
 ENV UNARR_DOWNLOAD_DIR=/downloads
 ENV XDG_DATA_HOME=/data
 
+# NVIDIA passthrough defaults. `--gpus all` alone only grants the "utility" +
+# "compute" capabilities; nvenc needs "video". Baking these here means a plain
+# `docker run --gpus all` (or the compose device reservation) lights up HW
+# transcode with zero extra flags. Harmless when no GPU is attached.
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
+
 VOLUME ["/config", "/downloads", "/data"]
 
 ENTRYPOINT ["unarr"]
diff --git a/docker-compose.yml b/docker-compose.yml
index 8e0b32e..60446db 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -45,9 +45,21 @@ services:
       # Named volume keeps this off your media drive (avoids NFS locking issues).
       - unarr-data:/data
 
-    # Optional: limit CPU/RAM for transcoding on shared hosts
+    # --- NVIDIA GPU: hardware transcode (nvenc) ---
+    # Uncomment on a host with an NVIDIA GPU + nvidia-container-toolkit. The
+    # image already bundles an nvenc-enabled ffmpeg and sets
+    # NVIDIA_DRIVER_CAPABILITIES=video,compute,utility, so this device
+    # reservation is the only thing needed to enable HW transcode. Without a GPU
+    # the same image falls back to software (libx264) automatically — leave it
+    # commented.   (docker run equivalent: add  --gpus all)
     # deploy:
     #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    #     # Optional: cap CPU/RAM for transcoding on shared hosts
     #     limits:
     #       memory: 2G
     #       cpus: "4.0"