feat(agent): auto-resume interrupted downloads after a daemon restart

A daemon restart used to abandon in-flight downloads: the in-memory queue was
lost and the web doesn't re-dispatch a stuck task, so the user had to retry
manually. The bytes already persisted (mmap + anacrolix's piece-completion DB
keyed by info_hash; debrid via Range; usenet via its tracker) — the daemon just
didn't re-attempt the work.

ActiveTaskStore persists each in-flight download's agent.Task payload to
active-tasks.json; the daemon re-submits them on startup so the downloaders
resume the partial data. manager.Submit now dedups (the startup re-submit and a
later web re-dispatch can't both run), and recordFinished removes a task from
the store only on a genuine terminal — shuttingDown (set before Shutdown cancels
the task contexts) keeps shutdown-interrupted tasks so they resume next start.
Stream/seed/upgrade tasks aren't persisted; ForceStart is cleared on resume.
This commit is contained in:
Deivid Soto 2026-05-31 22:44:05 +02:00
parent b708bb8ab2
commit 445da233c0
6 changed files with 399 additions and 9 deletions

View file

@ -0,0 +1,105 @@
package agent
import (
"encoding/json"
"os"
"path/filepath"
"sync"
"github.com/torrentclaw/unarr/internal/config"
)
// activeTasksFilePathFn is overridable for testing.
var activeTasksFilePathFn = func() string {
return filepath.Join(config.DataDir(), "active-tasks.json")
}
// ActiveTaskStore persists the dispatch payloads (agent.Task) of in-flight
// DOWNLOAD tasks so the daemon can re-submit them after a restart and have the
// downloaders resume the partial data — torrent via the persisted
// piece-completion DB, debrid via HTTP Range, usenet via its segment tracker.
//
// Distinct from LocalState (tasks.json), which holds transient status/progress
// for syncing to the web; this holds the re-dispatch payload needed to restart
// the work. An entry is added when a download starts and removed when it
// reaches a genuine terminal state (completed / failed / cancelled) — but NOT
// when the daemon is shutting down, so an interrupted download survives the
// restart and resumes.
type ActiveTaskStore struct {
mu sync.Mutex
tasks map[string]Task
}
// NewActiveTaskStore creates an empty store. Call Load() to hydrate it from disk.
func NewActiveTaskStore() *ActiveTaskStore {
return &ActiveTaskStore{tasks: make(map[string]Task)}
}
// Add records (or replaces) a task and persists the set.
func (s *ActiveTaskStore) Add(t Task) {
s.mu.Lock()
defer s.mu.Unlock()
s.tasks[t.ID] = t
s.flushLocked()
}
// Remove drops a task and persists the set. No-op if absent.
func (s *ActiveTaskStore) Remove(taskID string) {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.tasks[taskID]; !ok {
return
}
delete(s.tasks, taskID)
s.flushLocked()
}
// Load reads the persisted tasks from disk into the store and returns them.
// Returns nil on a missing or unreadable file (a fresh daemon has nothing to
// resume). Safe to call once at startup before any Add/Remove.
func (s *ActiveTaskStore) Load() []Task {
data, err := os.ReadFile(activeTasksFilePathFn())
if err != nil {
return nil
}
var tasks []Task
if json.Unmarshal(data, &tasks) != nil {
return nil
}
s.mu.Lock()
defer s.mu.Unlock()
s.tasks = make(map[string]Task, len(tasks))
for _, t := range tasks {
if t.ID != "" {
s.tasks[t.ID] = t
}
}
out := make([]Task, 0, len(s.tasks))
for _, t := range s.tasks {
out = append(out, t)
}
return out
}
// flushLocked atomically writes the current set to disk. Caller holds s.mu.
// Best-effort: a write failure is non-fatal (the in-memory set stays correct;
// at worst a crash before the next flush loses one resume entry).
func (s *ActiveTaskStore) flushLocked() {
tasks := make([]Task, 0, len(s.tasks))
for _, t := range s.tasks {
tasks = append(tasks, t)
}
data, err := json.MarshalIndent(tasks, "", " ")
if err != nil {
return
}
path := activeTasksFilePathFn()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return
}
tmp := path + ".tmp"
if err := os.WriteFile(tmp, data, 0o644); err != nil {
return
}
_ = os.Rename(tmp, path)
}