From 8accafbe593a0a8a3628504d9126cb1ff439b812 Mon Sep 17 00:00:00 2001 From: Deivid Soto Date: Mon, 1 Jun 2026 08:29:10 +0200 Subject: [PATCH 1/2] fix(stream): derive H.264 level from frame macroblocks, not height Anamorphic 2.39:1 scaled to 1080 height = ~2586x1080 = 11016 MBs, busting level 4.1's 8192-MB MaxFS -> nvenc "InitializeEncoder failed: Invalid Level" (libx264: "frame MB size > level limit") -> 0 segments, session stalls. Most 4K rips are 2.39:1, so HLS playback was silently broken for them. H264LevelForFrame(w,h) derives the level from the real macroblock count (max of MB-tier and height-tier). hls.go computes output width and uses it. 16:9 unchanged; anamorphic bumps to 5.0 when needed. Discovered + verified during the trickplay smoke. --- internal/engine/hls.go | 20 ++++++++---- internal/engine/hwaccel.go | 57 +++++++++++++++++++++++++++++++++ internal/engine/hwaccel_test.go | 42 ++++++++++++++++++++---- 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/internal/engine/hls.go b/internal/engine/hls.go index 8e0868a..75cf991 100644 --- a/internal/engine/hls.go +++ b/internal/engine/hls.go @@ -22,6 +22,7 @@ import ( "fmt" "io" "log" + "math" "net/http" "os" "os/exec" @@ -1184,11 +1185,14 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin // per session start, polluting logs even though encode succeeds. args = append(args, "-vaapi_device", "/dev/dri/renderD128") } - // Derive H.264 level from the actual output height. A fixed "4.0" caps the - // encoder at 1080p — anything taller (1440p, 4K source on quality=original) - // fails libx264 with "frame MB size > level limit" and emits unplayable - // segments. The output height matches qcap.MaxHeight when the source is - // downscaled, otherwise probe.Height (already populated by ffprobe). + // Derive H.264 level from the actual output FRAME (width × height), not just + // height. A fixed "4.0" caps the encoder at 1080p; deriving by height alone + // still under-levels anamorphic content — a 2.39:1 source scaled to 1080 + // height is ~2586×1080 = 11016 MBs, busting level 4.1's 8192-MB cap, which + // fails the encode ("Invalid Level" on nvenc, "frame MB size > level limit" + // on libx264) and stalls the session. The output height matches qcap.MaxHeight + // when the source is downscaled, otherwise probe.Height; the output width is + // the source width scaled by the same factor (the filter chain preserves AR). qcap := resolveQualityCap(cfg.Quality) outputHeight := qcap.MaxHeight if outputHeight == 0 { @@ -1197,7 +1201,11 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin if outputHeight == 0 || (probe.Height > 0 && probe.Height < outputHeight) { outputHeight = probe.Height } - args = append(args, "-profile:v", "main", "-level:v", H264LevelForHeight(outputHeight)) + outputWidth := probe.Width + if probe.Height > 0 && outputHeight != probe.Height { + outputWidth = int(math.Round(float64(probe.Width) * float64(outputHeight) / float64(probe.Height))) + } + args = append(args, "-profile:v", "main", "-level:v", H264LevelForFrame(outputWidth, outputHeight)) // Bitrate must match the level libx264 actually picks for outputHeight, // not the qcap target for the user's requested label. If a user asks for diff --git a/internal/engine/hwaccel.go b/internal/engine/hwaccel.go index d7d1bd4..5b5907a 100644 --- a/internal/engine/hwaccel.go +++ b/internal/engine/hwaccel.go @@ -271,3 +271,60 @@ func H264LevelForHeight(height int) string { return "6.0" } } + +// h264LevelRank orders level strings so callers can pick the higher of two. +var h264LevelRank = map[string]int{ + "3.0": 30, "3.1": 31, "3.2": 32, + "4.0": 40, "4.1": 41, "4.2": 42, + "5.0": 50, "5.1": 51, "6.0": 60, +} + +// levelForMacroblocks returns the lowest H.264 level whose MaxFS (frame size in +// macroblocks) covers `mbs`. The height-based H264LevelForHeight tier is correct +// for 16:9, but anamorphic content (2.39:1 cinemascope) scaled to a given height +// has a much wider frame: a 2.39:1 source downscaled to 1080 height becomes +// ~2586×1080 = 11016 MBs, which busts level 4.1's 8192-MB MaxFS. ffmpeg then +// fails the encode — libx264 with "frame MB size > level limit", h264_nvenc with +// "InitializeEncoder failed: invalid param (8): Invalid Level" — and emits zero +// packets (the whole HLS session stalls at "preparando sesión"). MaxFS values +// from the H.264 spec, Table A-1. +func levelForMacroblocks(mbs int) string { + switch { + case mbs <= 1620: + return "3.0" + case mbs <= 3600: + return "3.1" + case mbs <= 5120: + return "3.2" + case mbs <= 8192: // levels 4.0 and 4.1 share MaxFS 8192; pick 4.1 for headroom + return "4.1" + case mbs <= 8704: + return "4.2" + case mbs <= 22080: + return "5.0" + case mbs <= 36864: + return "5.1" + default: + return "6.0" + } +} + +// H264LevelForFrame returns the lowest H.264 level that satisfies BOTH the +// height-derived tier (which carries macroblock-rate / fps headroom) and the +// actual frame's macroblock count (which catches anamorphic frames that are far +// wider than 16:9 at a given height). Use this instead of H264LevelForHeight +// wherever the output width is known — it never under-levels an ultra-wide +// frame, and for 16:9 content it returns exactly what H264LevelForHeight does. +func H264LevelForFrame(width, height int) string { + byHeight := H264LevelForHeight(height) + if width <= 0 || height <= 0 { + return byHeight + } + // Macroblocks are 16×16; partial blocks at the edge still count (ceil). + mbs := ((width + 15) / 16) * ((height + 15) / 16) + byMB := levelForMacroblocks(mbs) + if h264LevelRank[byMB] > h264LevelRank[byHeight] { + return byMB + } + return byHeight +} diff --git a/internal/engine/hwaccel_test.go b/internal/engine/hwaccel_test.go index cf3bec2..35bb08a 100644 --- a/internal/engine/hwaccel_test.go +++ b/internal/engine/hwaccel_test.go @@ -81,12 +81,12 @@ func TestResolveEncoderProfileHonoursConfiguredPreset(t *testing.T) { configured string wantPreset string }{ - {HWAccelNone, "ultrafast", "ultrafast"}, // libx264 honours - {HWAccelNone, "medium", "medium"}, // libx264 honours - {HWAccelNVENC, "p1", "p3"}, // NVENC ignores, sticks to p3 - {HWAccelNVENC, "veryfast", "p3"}, // NVENC ignores libx264 vocab - {HWAccelQSV, "veryslow", "veryfast"}, // QSV ignores, sticks to veryfast - {HWAccelVideoToolbox, "veryfast", ""}, // VideoToolbox has no preset + {HWAccelNone, "ultrafast", "ultrafast"}, // libx264 honours + {HWAccelNone, "medium", "medium"}, // libx264 honours + {HWAccelNVENC, "p1", "p3"}, // NVENC ignores, sticks to p3 + {HWAccelNVENC, "veryfast", "p3"}, // NVENC ignores libx264 vocab + {HWAccelQSV, "veryslow", "veryfast"}, // QSV ignores, sticks to veryfast + {HWAccelVideoToolbox, "veryfast", ""}, // VideoToolbox has no preset } for _, tc := range cases { got := ResolveEncoderProfile(tc.hw, tc.configured) @@ -154,3 +154,33 @@ func TestHWAccelDiagnosticLogLineSoftwareButEncodersFound(t *testing.T) { } } +func TestH264LevelForFrame(t *testing.T) { + cases := []struct { + name string + width, height int + want string + }{ + // 16:9 must match the height-only helper exactly (no regression). + {"720p 16:9", 1280, 720, "4.0"}, + {"1080p 16:9", 1920, 1080, "4.1"}, + {"1440p 16:9", 2560, 1440, "5.0"}, + {"2160p 16:9", 3840, 2160, "5.1"}, + // Anamorphic 2.39:1 at 1080 height — the regression: ~2586×1080 = 11016 + // MBs busts level 4.1 (8192 MaxFS); must bump to 5.0. + {"1080h anamorphic 2.39:1", 2586, 1080, "5.0"}, + // Anamorphic 720 height (1728×720 = 4860 MBs) still fits the 4.0 the + // height floor already picks for fps headroom. + {"720h anamorphic 2.4:1", 1728, 720, "4.0"}, + // Source 4K anamorphic (3840×1604) encoded at source: 24240 MBs → 5.1. + {"4K anamorphic source", 3840, 1604, "5.1"}, + // Width unknown → fall back to the height-only tier. + {"width unknown", 0, 1080, "4.1"}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if got := H264LevelForFrame(c.width, c.height); got != c.want { + t.Errorf("H264LevelForFrame(%d,%d) = %q, want %q", c.width, c.height, got, c.want) + } + }) + } +} From c4ddd44a1a43be15918ed656d71d0589e145f2c0 Mon Sep 17 00:00:00 2001 From: Deivid Soto Date: Mon, 1 Jun 2026 19:36:41 +0200 Subject: [PATCH 2/2] feat(docker): glibc base with nvenc ffmpeg + par2/7z extractors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alpine/musl can't run NVIDIA's glibc userspace (nvidia-smi, libnvidia-encode, the static nvenc ffmpeg), so HW transcode was impossible — every 4K/anamorphic HLS encode fell back to software or failed. Switch the runtime stage to debian:bookworm-slim + a static BtbN ffmpeg built with nvenc, add par2 (Usenet segment repair) + 7z (RAR/7z extraction), and set NVIDIA_DRIVER_CAPABILITIES=video,compute,utility so a plain --gpus all (or the compose device reservation) lights up nvenc with no extra flags. Falls back to libx264 automatically when no GPU is attached. Build stage cross-compiles (--platform=BUILDPLATFORM) so multi-arch stays fast; downloads forced over IPv4. --- Dockerfile | 67 ++++++++++++++++++++++++++++++++++++---------- docker-compose.yml | 14 +++++++++- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/Dockerfile b/Dockerfile index 64ea4e2..7bb1416 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,8 @@ # ---- Build stage ---- -FROM golang:1.25-alpine AS builder +# Pin the builder to the host's native arch and cross-compile (CGO is off, so +# Go cross-compiles trivially). During multi-arch buildx this keeps `go build` +# at native speed instead of compiling under QEMU emulation for the foreign arch. +FROM --platform=$BUILDPLATFORM golang:1.25-alpine AS builder RUN apk add --no-cache git ca-certificates @@ -13,34 +16,63 @@ RUN go mod download COPY . . ARG VERSION=dev -RUN CGO_ENABLED=0 go build -ldflags="-s -w -X github.com/torrentclaw/unarr/internal/cmd.Version=${VERSION}" -trimpath -o /unarr ./cmd/unarr/ +ARG TARGETOS +ARG TARGETARCH +RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -ldflags="-s -w -X github.com/torrentclaw/unarr/internal/cmd.Version=${VERSION}" -trimpath -o /unarr ./cmd/unarr/ # ---- Runtime stage ---- -FROM alpine:3.22 +# glibc base (not Alpine/musl). NVIDIA's userspace — nvidia-smi and the +# libnvidia-encode / libcuda libs that `--gpus all` injects, plus the static +# BtbN ffmpeg that links nvenc — are all glibc ELF. On musl they fail with +# "no such file or directory" (missing glibc loader), so HW transcode is +# impossible on Alpine. bookworm-slim is the smallest base that runs the full +# NVIDIA stack while still falling back to software libx264 when no GPU is +# passed in. +FROM debian:bookworm-slim -# Use Alpine's native musl ffmpeg + ffprobe instead of the johnvansickle / -# BtbN static glibc builds — those need a glibc shim on Alpine and the -# vector-math symbols the GPL builds reference are not satisfiable by -# gcompat. Alpine ships ffmpeg ~7.x which is fine for the HLS transcoding -# pipeline (libx264 + libfdk-aac alternatives included). -RUN apk upgrade --no-cache && \ - apk add --no-cache ca-certificates tzdata ffmpeg wget +# par2 → repair corrupted Usenet segments (without it a single bad segment +# silently corrupts the output). +# 7z → archive extractor for RAR/7z-packed downloads (p7zip-full also reads +# RAR5, so unrar — unavailable as a free Debian package — isn't needed). +# tzdata/ca-certificates → TLS + correct local time for schedules/logs. +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates tzdata wget xz-utils par2 p7zip-full && \ + rm -rf /var/lib/apt/lists/* + +# TARGETARCH is set automatically by Docker buildx during cross-builds. +ARG TARGETARCH=amd64 + +# Static GPL ffmpeg + ffprobe with nvenc compiled in (BtbN builds). nvenc is +# linked but the actual libnvidia-encode.so is dlopen'd at runtime from the +# host driver that `--gpus all` exposes — so the same binary does HW transcode +# when a GPU is present and falls back to libx264 when it isn't. Placed in +# /usr/local/bin so ResolveFFmpeg picks them up off PATH ahead of any distro +# ffmpeg. arm64 has no nvenc but the build still serves software transcode. +RUN case "$TARGETARCH" in \ + amd64) FF_ARCH=linux64 ;; \ + arm64) FF_ARCH=linuxarm64 ;; \ + *) echo "unsupported TARGETARCH=$TARGETARCH" >&2; exit 1 ;; \ + esac && \ + wget -4 --tries=3 --timeout=30 -qO /tmp/ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-${FF_ARCH}-gpl.tar.xz" && \ + mkdir -p /tmp/ff && tar -xJf /tmp/ffmpeg.tar.xz -C /tmp/ff --strip-components=1 && \ + cp /tmp/ff/bin/ffmpeg /tmp/ff/bin/ffprobe /usr/local/bin/ && \ + chmod +x /usr/local/bin/ffmpeg /usr/local/bin/ffprobe && \ + rm -rf /tmp/ffmpeg.tar.xz /tmp/ff # Bundle cloudflared so `unarr funnel on` (default: on, see config defaults) # Just Works on a headless container with no first-run network round-trip. -# TARGETARCH is set automatically by Docker buildx during cross-builds. -ARG TARGETARCH=amd64 RUN case "$TARGETARCH" in \ amd64) CF_ARCH=amd64 ;; \ arm64) CF_ARCH=arm64 ;; \ arm) CF_ARCH=armhf ;; \ *) echo "unsupported TARGETARCH=$TARGETARCH" >&2; exit 1 ;; \ esac && \ - wget -qO /usr/local/bin/cloudflared "https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-$CF_ARCH" && \ + wget -4 --tries=3 --timeout=30 -qO /usr/local/bin/cloudflared "https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-$CF_ARCH" && \ chmod +x /usr/local/bin/cloudflared # Non-root user (UID 1000 matches typical host user for volume permissions) -RUN addgroup -g 1000 unarr && adduser -u 1000 -G unarr -D -h /home/unarr unarr +RUN groupadd -g 1000 unarr && useradd -u 1000 -g 1000 -m -d /home/unarr unarr # Default directories RUN mkdir -p /config /downloads /data && \ @@ -55,6 +87,13 @@ ENV UNARR_CONFIG_DIR=/config ENV UNARR_DOWNLOAD_DIR=/downloads ENV XDG_DATA_HOME=/data +# NVIDIA passthrough defaults. `--gpus all` alone only grants the "utility" + +# "compute" capabilities; nvenc needs "video". Baking these here means a plain +# `docker run --gpus all` (or the compose device reservation) lights up HW +# transcode with zero extra flags. Harmless when no GPU is attached. +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility + VOLUME ["/config", "/downloads", "/data"] ENTRYPOINT ["unarr"] diff --git a/docker-compose.yml b/docker-compose.yml index 8e0b32e..60446db 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -45,9 +45,21 @@ services: # Named volume keeps this off your media drive (avoids NFS locking issues). - unarr-data:/data - # Optional: limit CPU/RAM for transcoding on shared hosts + # --- NVIDIA GPU: hardware transcode (nvenc) --- + # Uncomment on a host with an NVIDIA GPU + nvidia-container-toolkit. The + # image already bundles an nvenc-enabled ffmpeg and sets + # NVIDIA_DRIVER_CAPABILITIES=video,compute,utility, so this device + # reservation is the only thing needed to enable HW transcode. Without a GPU + # the same image falls back to software (libx264) automatically — leave it + # commented. (docker run equivalent: add --gpus all) # deploy: # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + # # Optional: cap CPU/RAM for transcoding on shared hosts # limits: # memory: 2G # cpus: "4.0"