feat(stream): burn bitmap (PGS/DVB) subtitles into the video via overlay

Bitmap subs can't be served as WebVTT, so the user picks one and the daemon
re-encodes with it overlaid. HLSSessionConfig.BurnSubtitleIndex (*int, nil=no
burn) flows into the cache key + a -filter_complex graph:
  [0✌️0]<vchain>[base];[0:s:N][base]scale2ref[sub][base2];[base2][sub]overlay[vout]
Overlay after the tonemap (SDR subs keep brightness); scale2ref fits the PGS
canvas to the output. Invalid/text/out-of-range index -> clean-encode fallback.
IsTextSubtitle now includes "text" (parity with the web classifier).
This commit is contained in:
Deivid Soto 2026-06-01 09:51:27 +02:00
parent 8207d1d2a9
commit 665ec0a34f
9 changed files with 196 additions and 49 deletions

View file

@ -151,7 +151,15 @@ type HLSSessionConfig struct {
FileName string
Quality string // "2160p"|"1080p"|"720p"|"480p"|"original"|""
AudioIndex int // 0-based ffmpeg audio stream selection (-map 0:a:N). -1 = default.
Transcode TranscodeRuntime
// BurnSubtitleIndex burns a BITMAP subtitle (PGS/DVB) at this 0-based
// subtitle stream index into the video. nil = no burn (text subs are served
// as separate WebVTT). A pointer (not int) so the zero value 0 — a valid
// stream index — can't be mistaken for a burn request when a caller leaves
// the field unset. Part of the cache key so a burned encode never collides
// with the clean one. Forces the video re-encode the HLS path already does
// to also composite the subtitle overlay.
BurnSubtitleIndex *int
Transcode TranscodeRuntime
// Cache is an optional persistent segment cache keyed by (source, quality,
// audio). When set, completed encodes are kept across sessions so re-plays
// of the same file at the same quality skip ffmpeg entirely. nil disables
@ -169,6 +177,15 @@ func (cfg HLSSessionConfig) sourceRef() string {
return cfg.SourcePath
}
// burnSubtitleIndexOrNone resolves the optional burn-in subtitle pointer to the
// int sentinel the cache key and filtergraph use: nil → -1 ("no burn").
func (cfg HLSSessionConfig) burnSubtitleIndexOrNone() int {
if cfg.BurnSubtitleIndex == nil {
return -1
}
return *cfg.BurnSubtitleIndex
}
// logName is a short, log-friendly source label. For local files it's the base
// name; for a URL source (no SourcePath) it prefers FileName over the raw URL
// (which would leak a query-string token into the logs).
@ -383,9 +400,9 @@ func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, er
// Debrid URL sessions key by CacheID (info_hash) so re-plays hit cache
// despite the URL changing each resolution; local files key by path.
if cfg.CacheID != "" {
cacheKey = cfg.Cache.KeyForID(cfg.CacheID, cfg.Quality, cfg.AudioIndex)
cacheKey = cfg.Cache.KeyForID(cfg.CacheID, cfg.Quality, cfg.AudioIndex, cfg.burnSubtitleIndexOrNone())
} else {
cacheKey = cfg.Cache.KeyFor(cfg.SourcePath, cfg.Quality, cfg.AudioIndex)
cacheKey = cfg.Cache.KeyFor(cfg.SourcePath, cfg.Quality, cfg.AudioIndex, cfg.burnSubtitleIndexOrNone())
}
// Integrity gate: HasComplete just stats the marker. If init.mp4 or
// the last segment vanished (external rm, partial-disk failure), we
@ -1217,8 +1234,31 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin
args = append(args, "-output_ts_offset", strconv.FormatFloat(startSec, 'f', 3, 64))
}
// Map video + selected audio. Always use first video stream.
args = append(args, "-map", "0:v:0")
// Burn a bitmap subtitle (PGS/DVB) into the video when requested. Validate
// the index points at a real bitmap track in range — text subs are served as
// separate WebVTT and never burned, and a stale/out-of-range index falls
// back to a clean encode rather than failing the session.
burnIdx := -1
if reqBurn := cfg.burnSubtitleIndexOrNone(); reqBurn >= 0 {
if reqBurn < len(probe.SubtitleTracks) &&
!probe.SubtitleTracks[reqBurn].IsTextSubtitle() {
burnIdx = reqBurn
} else {
log.Printf("[hls %s] burn subtitle %d ignored — not a bitmap track in range",
shortHLSID(cfg.SessionID), reqBurn)
}
}
// Map video + selected audio. With burn-in the video comes from the
// filter_complex graph ([vout], built below); otherwise map the source video
// stream directly. ffmpeg resolves the [vout] label from -filter_complex
// regardless of argv order, so mapping it here (before audio) keeps video as
// output stream 0.
if burnIdx >= 0 {
args = append(args, "-map", "[vout]")
} else {
args = append(args, "-map", "0:v:0")
}
audioIdx := cfg.AudioIndex
if audioIdx < 0 {
audioIdx = 0
@ -1362,19 +1402,37 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin
if probe.HDR != "" && cfg.Transcode.TonemapHDR {
tonemap = hdrTonemapChain
}
var filterChain string
// Core video chain (scale + optional tonemap + pixel format + color metadata),
// WITHOUT the optional hwUploadTail — that has to run last, after any subtitle
// overlay, so it's appended separately below.
var vchain string
if maxH > 0 && probe.Height > maxH {
filterChain = fmt.Sprintf(
"scale=-2:%d:force_original_aspect_ratio=decrease,scale=trunc(iw/2)*2:trunc(ih/2)*2,%sformat=%s%s%s",
maxH, tonemap, pixFormat, colorTail, hwUploadTail,
vchain = fmt.Sprintf(
"scale=-2:%d:force_original_aspect_ratio=decrease,scale=trunc(iw/2)*2:trunc(ih/2)*2,%sformat=%s%s",
maxH, tonemap, pixFormat, colorTail,
)
} else {
filterChain = fmt.Sprintf(
"scale=trunc(iw/2)*2:trunc(ih/2)*2,%sformat=%s%s%s",
tonemap, pixFormat, colorTail, hwUploadTail,
vchain = fmt.Sprintf(
"scale=trunc(iw/2)*2:trunc(ih/2)*2,%sformat=%s%s",
tonemap, pixFormat, colorTail,
)
}
args = append(args, "-vf", filterChain)
if burnIdx >= 0 {
// Burn-in: process the video to its final size + SDR colorspace FIRST,
// then composite the subtitle. Overlaying SDR PGS/DVB graphics onto a
// still-HDR (PQ) frame and tonemapping afterwards would crush the
// subtitle brightness, so the overlay must come after the tonemap. The
// subtitle canvas is scaled to the processed frame via scale2ref so a
// PGS/DVB stream authored at any resolution lines up. hwUploadTail
// (VAAPI) runs last, on the composited frame.
filterComplex := fmt.Sprintf(
"[0:v:0]%s[base];[0:s:%d][base]scale2ref[sub][base2];[base2][sub]overlay%s[vout]",
vchain, burnIdx, hwUploadTail,
)
args = append(args, "-filter_complex", filterComplex)
} else {
args = append(args, "-vf", vchain+hwUploadTail)
}
// Audio: AAC stereo 48 kHz — broadest browser compatibility.
audioBitrate := cfg.Transcode.AudioBitrate

View file

@ -153,12 +153,12 @@ func (c *HLSCache) ReleaseWriter(key string) {
// KeyFor derives a stable cache key for (source, quality, audioIndex). Using
// the absolute source path means renaming a file invalidates the cache, which
// is correct — segment content is tied to the encoded source.
func (c *HLSCache) KeyFor(sourcePath, quality string, audioIndex int) string {
func (c *HLSCache) KeyFor(sourcePath, quality string, audioIndex, burnSubtitleIndex int) string {
abs, err := filepath.Abs(sourcePath)
if err != nil {
abs = sourcePath
}
h := sha256.Sum256([]byte(fmt.Sprintf("%s|%s|%d", abs, quality, audioIndex)))
h := sha256.Sum256([]byte(fmt.Sprintf("%s|%s|%d|%d", abs, quality, audioIndex, burnSubtitleIndex)))
return hex.EncodeToString(h[:8]) // 16 hex chars — collision-safe enough for per-host cache
}
@ -167,8 +167,8 @@ func (c *HLSCache) KeyFor(sourcePath, quality string, audioIndex int) string {
// the debrid direct URL is re-resolved per play and would never cache-hit, so
// we key by the torrent info_hash — the same content always maps to the same
// key across plays. NOT run through filepath.Abs (an id/URL is not a path).
func (c *HLSCache) KeyForID(id, quality string, audioIndex int) string {
h := sha256.Sum256([]byte(fmt.Sprintf("%s|%s|%d", id, quality, audioIndex)))
func (c *HLSCache) KeyForID(id, quality string, audioIndex, burnSubtitleIndex int) string {
h := sha256.Sum256([]byte(fmt.Sprintf("%s|%s|%d|%d", id, quality, audioIndex, burnSubtitleIndex)))
return hex.EncodeToString(h[:8])
}

View file

@ -98,7 +98,7 @@ func TestHLSCacheSmoke(t *testing.T) {
encodeDur := time.Since(t0)
t.Logf("session 1: MISS completed in %s", encodeDur.Round(time.Millisecond))
key := cache.KeyFor(source, "720p", 0)
key := cache.KeyFor(source, "720p", 0, -1)
if !cache.HasComplete(key) {
t.Fatalf("cache.HasComplete(%s) is false after successful encode", key)
}

View file

@ -21,18 +21,21 @@ func newTestCache(t *testing.T, sizeGB int) *HLSCache {
func TestKeyForStable(t *testing.T) {
c := newTestCache(t, 1)
k1 := c.KeyFor("/a/b/movie.mkv", "1080p", 0)
k2 := c.KeyFor("/a/b/movie.mkv", "1080p", 0)
k1 := c.KeyFor("/a/b/movie.mkv", "1080p", 0, -1)
k2 := c.KeyFor("/a/b/movie.mkv", "1080p", 0, -1)
if k1 != k2 {
t.Fatalf("expected stable keys, got %q vs %q", k1, k2)
}
if c.KeyFor("/a/b/movie.mkv", "720p", 0) == k1 {
if c.KeyFor("/a/b/movie.mkv", "720p", 0, -1) == k1 {
t.Fatal("quality should change key")
}
if c.KeyFor("/a/b/movie.mkv", "1080p", 1) == k1 {
if c.KeyFor("/a/b/movie.mkv", "1080p", 1, -1) == k1 {
t.Fatal("audio index should change key")
}
if c.KeyFor("/x/y/other.mkv", "1080p", 0) == k1 {
if c.KeyFor("/a/b/movie.mkv", "1080p", 0, 2) == k1 {
t.Fatal("burn subtitle index should change key")
}
if c.KeyFor("/x/y/other.mkv", "1080p", 0, -1) == k1 {
t.Fatal("path should change key")
}
}

View file

@ -111,12 +111,80 @@ func TestHLSSourceRefAndCacheID(t *testing.T) {
c := &HLSCache{root: "/tmp/cache"}
// Same CacheID + quality + audio → same key regardless of the (volatile) URL.
k1 := c.KeyForID("hash1", "720p", -1)
k2 := c.KeyForID("hash1", "720p", -1)
k1 := c.KeyForID("hash1", "720p", -1, -1)
k2 := c.KeyForID("hash1", "720p", -1, -1)
if k1 != k2 {
t.Errorf("KeyForID not stable: %q != %q", k1, k2)
}
if c.KeyForID("hash2", "720p", -1) == k1 {
if c.KeyForID("hash2", "720p", -1, -1) == k1 {
t.Error("KeyForID collision across distinct ids")
}
}
// Burn-in: a bitmap subtitle index routes the video through -filter_complex with
// scale2ref + overlay and maps [vout]; a nil / text / out-of-range index keeps
// the plain -vf path (text subs are served as WebVTT, never burned).
func TestBuildHLSFFmpegArgsBurnSubtitle(t *testing.T) {
idx := func(n int) *int { return &n }
base := func() HLSSessionConfig {
return HLSSessionConfig{
SessionID: "burn",
SourcePath: "/tmp/movie.mkv",
Quality: "1080p",
Transcode: TranscodeRuntime{
FFmpegPath: "/usr/bin/ffmpeg",
FFprobePath: "/usr/bin/ffprobe",
HWAccel: HWAccelNone,
},
}
}
probe := &StreamProbe{
Width: 1920, Height: 1080, DurationSec: 100,
SubtitleTracks: []ProbeSubtitleTrack{
{Index: 0, Codec: "subrip"}, // text → not burnable
{Index: 1, Codec: "hdmv_pgs_subtitle"}, // bitmap → burnable
},
}
t.Run("nil = clean -vf path", func(t *testing.T) {
got := strings.Join(buildHLSFFmpegArgsAt(base(), probe, "/tmp/d", 0, 0), " ")
if strings.Contains(got, "-filter_complex") || strings.Contains(got, "overlay") {
t.Errorf("no-burn argv must not overlay: %s", got)
}
if !strings.Contains(got, "-map 0:v:0") || !strings.Contains(got, "-vf") {
t.Errorf("no-burn argv must -map 0:v:0 with -vf: %s", got)
}
})
t.Run("bitmap index burns via filter_complex", func(t *testing.T) {
cfg := base()
cfg.BurnSubtitleIndex = idx(1)
got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/d", 0, 0), " ")
for _, want := range []string{"-filter_complex", "[0:s:1]", "scale2ref", "overlay", "-map [vout]"} {
if !strings.Contains(got, want) {
t.Errorf("burn argv missing %q: %s", want, got)
}
}
if strings.Contains(got, "-map 0:v:0") {
t.Errorf("burn argv must map [vout], not 0:v:0: %s", got)
}
})
t.Run("text index is ignored (served as WebVTT)", func(t *testing.T) {
cfg := base()
cfg.BurnSubtitleIndex = idx(0) // subrip → not a bitmap track
got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/d", 0, 0), " ")
if strings.Contains(got, "overlay") || strings.Contains(got, "-filter_complex") {
t.Errorf("text-sub burn must fall back to clean encode: %s", got)
}
})
t.Run("out-of-range index is ignored", func(t *testing.T) {
cfg := base()
cfg.BurnSubtitleIndex = idx(9)
got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/d", 0, 0), " ")
if strings.Contains(got, "overlay") {
t.Errorf("out-of-range burn must fall back to clean encode: %s", got)
}
})
}

View file

@ -61,7 +61,7 @@ type ProbeSubtitleTrack struct {
// without re-rendering. Bitmap subs (PGS, DVB) need burn-in.
func (s ProbeSubtitleTrack) IsTextSubtitle() bool {
switch s.Codec {
case "subrip", "srt", "ass", "ssa", "webvtt", "mov_text":
case "subrip", "srt", "ass", "ssa", "webvtt", "mov_text", "text":
return true
}
return false