From 32b36822726556341c04396f63fe7b3bc771d54f Mon Sep 17 00:00:00 2001 From: Michael Hess Date: Fri, 13 Mar 2026 08:28:15 +0100 Subject: [PATCH 1/6] ignore local worktrees --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 43c4eff00a8..22c83df6730 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,4 @@ asset-stats.html /evcc.db /tsconfig.tsbuildinfo *storybook.log +.worktrees/ From 4e5f7db129df30b58a825b659f14b6b56693f589 Mon Sep 17 00:00:00 2001 From: Michael Hess Date: Fri, 13 Mar 2026 08:30:32 +0100 Subject: [PATCH 2/6] add superpower plans --- ...-expected-orphan-commandresponse-design.md | 183 ++++++++ ...ee-expected-orphan-commandresponse-plan.md | 401 ++++++++++++++++++ 2 files changed, 584 insertions(+) create mode 100644 docs/plans/2026-03-04-easee-expected-orphan-commandresponse-design.md create mode 100644 docs/plans/2026-03-04-easee-expected-orphan-commandresponse-plan.md diff --git a/docs/plans/2026-03-04-easee-expected-orphan-commandresponse-design.md b/docs/plans/2026-03-04-easee-expected-orphan-commandresponse-design.md new file mode 100644 index 00000000000..267f8b1044e --- /dev/null +++ b/docs/plans/2026-03-04-easee-expected-orphan-commandresponse-design.md @@ -0,0 +1,183 @@ +# Design: Easee Expected-Orphan CommandResponse Handling + +**Date:** 2026-03-04 +**Branch:** feat/easee-log-rogue-commandresponse + +--- + +## Problem + +The Easee charger communicates command confirmations asynchronously via SignalR +`CommandResponse` messages. Each response carries a `Ticks` value that evcc +uses to correlate it with the originating REST API call. + +However, some REST endpoints return `HTTP 200` (synchronous) rather than +`HTTP 202` (asynchronous with ticks in the body). The charger still executes +these commands and fires a `CommandResponse` — but evcc never registered ticks +for these calls, so the response is incorrectly flagged as a rogue warning: + +``` +WARN rogue CommandResponse: charger EHWHL6VE sent Ticks=639082368062611660 + (accepted=true, resultCode=0) which was not triggered by evcc — + another system may be controlling this charger +``` + +Observed in practice for `POST /api/sites/{siteId}/circuits/{circuitId}/settings` +(called from `Phases1p3p`), which returns `200 OK` with an empty body. +The resulting `CommandResponse` carries `ID=22` (`CIRCUIT_MAX_CURRENT_P1`). + +Additionally, the existing rogue warning only logs the raw `Ticks` integer, +making it hard to understand what kind of command was received. + +--- + +## Goals + +1. Suppress false-positive rogue warnings for CommandResponses that evcc itself + triggered via 200-returning endpoints. +2. Preserve genuine rogue detection: external systems sending the same + ObservationID when evcc has no pending call should still produce a WARN. +3. Improve the rogue warning to include the human-readable ObservationID name. + +--- + +## Non-Goals + +- Tracking ticks from 200-returning endpoints (the API does not provide them). +- Handling the case where the charger sends CommandResponses for P2/P3 circuit + phases separately — only P1 (`CIRCUIT_MAX_CURRENT_P1 = 22`) has been observed. + This can be extended later if needed. + +--- + +## Design + +### Approach: Expected-Orphan Counter (per ObservationID) + +Before issuing a POST to a known 200-returning endpoint, register the +ObservationID(s) expected to arrive as CommandResponses. When a CommandResponse +arrives with no matching tick, check the counter: + +- Counter > 0 → expected orphan from evcc's own sync call; consume silently. +- Counter = 0 → truly rogue; log WARN. + +This is precise: an external system triggering the same ObservationID while +evcc has no pending call is still flagged. + +--- + +## Data Structures + +Add one field to the `Easee` struct, protected by the existing `cmdMu` mutex: + +```go +expectedOrphans map[easee.ObservationID]int +``` + +Initialised alongside `pendingTicks` in `NewEasee`: + +```go +pendingTicks: make(map[int64]chan easee.SignalRCommandResponse), +expectedOrphans: make(map[easee.ObservationID]int), +``` + +--- + +## Helpers + +```go +func (c *Easee) registerExpectedOrphan(ids ...easee.ObservationID) { + c.cmdMu.Lock() + defer c.cmdMu.Unlock() + for _, id := range ids { + c.expectedOrphans[id]++ + } +} + +func (c *Easee) consumeExpectedOrphan(id easee.ObservationID) bool { + c.cmdMu.Lock() + defer c.cmdMu.Unlock() + if c.expectedOrphans[id] > 0 { + c.expectedOrphans[id]-- + return true + } + return false +} +``` + +Mirrors the existing `registerPendingTick` / `unregisterPendingTick` pattern. + +No cleanup/deregister on the call site: the counter is consumed by the +`CommandResponse` handler when the charger responds. If the charger never +responds (e.g. network drop), the counter stays > 0, meaning one future +external call with the same ObservationID would be silently consumed — an +acceptable trade-off given the low probability. + +--- + +## Call Site: `Phases1p3p` (circuit level) + +```go +if c.circuit != 0 { + // ... existing GET + build data ... + + c.registerExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1) + _, err = c.postJSONAndWait(uri, data) +} +``` + +`registerExpectedOrphan` is called **before** the POST so that the counter is +in place before any CommandResponse can arrive. No defer needed on the call +site — the counter is consumed by `CommandResponse`. + +--- + +## `CommandResponse` Handler + +```go +func (c *Easee) CommandResponse(i json.RawMessage) { + var res easee.SignalRCommandResponse + if err := json.Unmarshal(i, &res); err != nil { + c.log.ERROR.Printf("invalid message: %s %v", i, err) + return + } + + obsID := easee.ObservationID(res.ID) + c.log.TRACE.Printf("CommandResponse %s: %+v", res.SerialNumber, res) + + c.cmdMu.Lock() + ch, ok := c.pendingTicks[res.Ticks] + c.cmdMu.Unlock() + + if ok { + ch <- res + return + } + + if c.consumeExpectedOrphan(obsID) { + return + } + + c.log.WARN.Printf("rogue CommandResponse: charger %s ObservationID=%s Ticks=%d "+ + "(accepted=%v, resultCode=%d) which was not triggered by evcc — "+ + "another system may be controlling this charger", + res.SerialNumber, obsID, res.Ticks, res.WasAccepted, res.ResultCode) +} +``` + +`easee.ObservationID(res.ID).String()` (generated by enumer) provides the +human-readable name for known IDs and falls back to the integer representation +for unknown ones — no extra handling needed. + +--- + +## Changes Summary + +| What | Location | +|---|---| +| Add `expectedOrphans map[easee.ObservationID]int` field | `Easee` struct | +| Initialise `expectedOrphans` | `NewEasee` | +| Add `registerExpectedOrphan` helper | `easee.go` | +| Add `consumeExpectedOrphan` helper | `easee.go` | +| Call `registerExpectedOrphan(CIRCUIT_MAX_CURRENT_P1)` before POST | `Phases1p3p` (circuit branch) | +| Check orphan counter; add ObservationID name to rogue WARN | `CommandResponse` | diff --git a/docs/plans/2026-03-04-easee-expected-orphan-commandresponse-plan.md b/docs/plans/2026-03-04-easee-expected-orphan-commandresponse-plan.md new file mode 100644 index 00000000000..f593b0695e7 --- /dev/null +++ b/docs/plans/2026-03-04-easee-expected-orphan-commandresponse-plan.md @@ -0,0 +1,401 @@ +# Easee Expected-Orphan CommandResponse Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Suppress false-positive rogue CommandResponse warnings for sync (HTTP 200) API calls made by evcc, and add the human-readable ObservationID name to the rogue warning message. + +**Architecture:** Add an `expectedOrphans map[easee.ObservationID]int` counter to the `Easee` struct. Call sites that POST to 200-returning endpoints pre-register the ObservationID(s) they expect back. The `CommandResponse` handler consumes the counter before deciding whether to emit a rogue WARN. + +**Tech Stack:** Go, `charger/easee.go`, `charger/easee_test.go`, `charger/easee/signalr.go` (ObservationID enum — read-only reference). + +--- + +### Task 1: Add `expectedOrphans` field and initialise it + +**Files:** +- Modify: `charger/easee.go` (struct definition ~line 67, NewEasee init ~line 117) + +**Step 1: Add the field to the struct** + +In the `Easee` struct, directly after the `pendingTicks` field (~line 67): + +```go +cmdMu sync.Mutex +pendingTicks map[int64]chan easee.SignalRCommandResponse +expectedOrphans map[easee.ObservationID]int // add this line +``` + +**Step 2: Initialise it in `NewEasee`** + +In `NewEasee`, directly after the `pendingTicks` initialisation (~line 117): + +```go +pendingTicks: make(map[int64]chan easee.SignalRCommandResponse), +expectedOrphans: make(map[easee.ObservationID]int), // add this line +``` + +**Step 3: Initialise it in `newEasee` test helper** + +In `charger/easee_test.go`, in the `newEasee()` helper (~line 36): + +```go +pendingTicks: make(map[int64]chan easee.SignalRCommandResponse), +expectedOrphans: make(map[easee.ObservationID]int), // add this line +``` + +**Step 4: Verify it compiles** + +```bash +go build ./charger/... +``` + +Expected: no errors. + +**Step 5: Commit** + +```bash +git add charger/easee.go charger/easee_test.go +git commit -m "feat(easee): add expectedOrphans map to Easee struct" +``` + +--- + +### Task 2: Add `registerExpectedOrphan` and `consumeExpectedOrphan` helpers + +**Files:** +- Modify: `charger/easee.go` (after the existing `unregisterPendingTick` helper, ~line 226) + +**Step 1: Write the failing test** + +In `charger/easee_test.go`, add after `TestEasee_CommandResponse_legitimate`: + +```go +func TestEasee_registerAndConsumeExpectedOrphan(t *testing.T) { + e := newEasee() + + // Not registered yet — consume returns false + assert.False(t, e.consumeExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1)) + + // Register once + e.registerExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1) + + // First consume succeeds + assert.True(t, e.consumeExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1)) + + // Second consume fails (counter back to zero) + assert.False(t, e.consumeExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1)) +} + +func TestEasee_registerExpectedOrphan_multipleRegistrations(t *testing.T) { + e := newEasee() + + // Register twice (two concurrent calls in flight) + e.registerExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1) + e.registerExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1) + + assert.True(t, e.consumeExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1)) + assert.True(t, e.consumeExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1)) + assert.False(t, e.consumeExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1)) +} +``` + +**Step 2: Run the tests to verify they fail** + +```bash +go test ./charger/... -run "TestEasee_registerAndConsumeExpectedOrphan|TestEasee_registerExpectedOrphan_multipleRegistrations" -v +``` + +Expected: FAIL — `e.registerExpectedOrphan undefined`, `e.consumeExpectedOrphan undefined`. + +**Step 3: Implement the helpers** + +In `charger/easee.go`, add after `unregisterPendingTick`: + +```go +func (c *Easee) registerExpectedOrphan(ids ...easee.ObservationID) { + c.cmdMu.Lock() + defer c.cmdMu.Unlock() + for _, id := range ids { + c.expectedOrphans[id]++ + } +} + +func (c *Easee) consumeExpectedOrphan(id easee.ObservationID) bool { + c.cmdMu.Lock() + defer c.cmdMu.Unlock() + if c.expectedOrphans[id] > 0 { + c.expectedOrphans[id]-- + return true + } + return false +} +``` + +**Step 4: Run the tests to verify they pass** + +```bash +go test ./charger/... -run "TestEasee_registerAndConsumeExpectedOrphan|TestEasee_registerExpectedOrphan_multipleRegistrations" -v +``` + +Expected: PASS. + +**Step 5: Commit** + +```bash +git add charger/easee.go charger/easee_test.go +git commit -m "feat(easee): add registerExpectedOrphan/consumeExpectedOrphan helpers" +``` + +--- + +### Task 3: Update `CommandResponse` handler — check orphan counter and improve rogue message + +**Files:** +- Modify: `charger/easee.go` (`CommandResponse` method, ~line 392) + +**Step 1: Write the failing test** + +In `charger/easee_test.go`, add after the existing `TestEasee_CommandResponse_rogue`: + +```go +func TestEasee_CommandResponse_expectedOrphan(t *testing.T) { + e := newEasee() + + // Pre-register the expected orphan + e.registerExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1) + + resp := easee.SignalRCommandResponse{ + SerialNumber: "EH123456", + ID: int(easee.CIRCUIT_MAX_CURRENT_P1), + Ticks: 111111111, + WasAccepted: true, + ResultCode: 0, + } + + raw, err := json.Marshal(resp) + require.NoError(t, err) + + // Should not panic and should consume the orphan counter + assert.NotPanics(t, func() { + e.CommandResponse(raw) + }) + + // Counter should now be zero — a second response would be rogue + assert.False(t, e.consumeExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1)) +} + +func TestEasee_CommandResponse_rogueAfterOrphanConsumed(t *testing.T) { + e := newEasee() + + // Register and immediately consume via CommandResponse + e.registerExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1) + + resp := easee.SignalRCommandResponse{ + SerialNumber: "EH123456", + ID: int(easee.CIRCUIT_MAX_CURRENT_P1), + Ticks: 111111111, + WasAccepted: true, + } + raw, _ := json.Marshal(resp) + e.CommandResponse(raw) // consumes the counter + + // A second identical response with counter=0 should be treated as rogue (not panic) + assert.NotPanics(t, func() { + e.CommandResponse(raw) + }) + + // pendingTicks untouched + e.cmdMu.Lock() + assert.Empty(t, e.pendingTicks) + e.cmdMu.Unlock() +} +``` + +**Step 2: Run the tests to verify they fail** + +```bash +go test ./charger/... -run "TestEasee_CommandResponse_expectedOrphan|TestEasee_CommandResponse_rogueAfterOrphanConsumed" -v +``` + +Expected: FAIL — `TestEasee_CommandResponse_expectedOrphan` fails because the current handler does not consume the orphan counter (counter stays at 1 after the call). + +**Step 3: Update the `CommandResponse` handler** + +Replace the existing `CommandResponse` method body in `charger/easee.go`: + +```go +func (c *Easee) CommandResponse(i json.RawMessage) { + var res easee.SignalRCommandResponse + + if err := json.Unmarshal(i, &res); err != nil { + c.log.ERROR.Printf("invalid message: %s %v", i, err) + return + } + + obsID := easee.ObservationID(res.ID) + c.log.TRACE.Printf("CommandResponse %s: %+v", res.SerialNumber, res) + + c.cmdMu.Lock() + ch, ok := c.pendingTicks[res.Ticks] + c.cmdMu.Unlock() + + if ok { + ch <- res + return + } + + if c.consumeExpectedOrphan(obsID) { + return + } + + c.log.WARN.Printf("rogue CommandResponse: charger %s ObservationID=%s Ticks=%d "+ + "(accepted=%v, resultCode=%d) which was not triggered by evcc — "+ + "another system may be controlling this charger", + res.SerialNumber, obsID, res.Ticks, res.WasAccepted, res.ResultCode) +} +``` + +Note: `easee.ObservationID(res.ID).String()` is provided automatically by the enumer's `String()` method. For unknown IDs it will fall back to the integer representation — no special handling needed. + +**Step 4: Run the tests to verify they pass** + +```bash +go test ./charger/... -run "TestEasee_CommandResponse" -v +``` + +Expected: all four CommandResponse tests PASS (`_rogue`, `_legitimate`, `_expectedOrphan`, `_rogueAfterOrphanConsumed`). + +**Step 5: Run the full charger test suite** + +```bash +go test ./charger/... -v +``` + +Expected: all tests PASS. + +**Step 6: Commit** + +```bash +git add charger/easee.go charger/easee_test.go +git commit -m "feat(easee): handle expected-orphan CommandResponses from sync API calls" +``` + +--- + +### Task 4: Register expected orphan at the `Phases1p3p` circuit-level call site + +**Files:** +- Modify: `charger/easee.go` (`Phases1p3p` method, ~line 735) + +**Step 1: Write the failing test** + +In `charger/easee_test.go`, add a new test that exercises the circuit-level `Phases1p3p` path and verifies the orphan counter is pre-registered before the POST: + +```go +func TestEasee_Phases1p3p_registersExpectedOrphan(t *testing.T) { + const siteID = 12345 + const circuitID = 67890 + const chargerID = "TESTTEST" + + e := newEasee() + e.charger = chargerID + e.site = siteID + e.circuit = circuitID + + httpmock.ActivateNonDefault(e.Client) + defer httpmock.DeactivateAndReset() + + // Mock GET circuit settings + getURI := fmt.Sprintf("%s/sites/%d/circuits/%d/settings", easee.API, siteID, circuitID) + maxP1, maxP2, maxP3 := 32.0, 32.0, 32.0 + getResp := easee.CircuitSettings{ + MaxCircuitCurrentP1: &maxP1, + MaxCircuitCurrentP2: &maxP2, + MaxCircuitCurrentP3: &maxP3, + } + body, _ := json.Marshal(getResp) + httpmock.RegisterResponder(http.MethodGet, getURI, + httpmock.NewBytesResponder(200, body)) + + // Mock POST circuit settings — return 200 (sync) + httpmock.RegisterResponder(http.MethodPost, getURI, + httpmock.NewStringResponder(200, "")) + + err := e.Phases1p3p(1) + assert.NoError(t, err) + + // After the call, the orphan counter should have been pre-registered + // and then consumed (if a CommandResponse had arrived). Since no + // CommandResponse arrived in this test, the counter stays at 1. + e.cmdMu.Lock() + count := e.expectedOrphans[easee.CIRCUIT_MAX_CURRENT_P1] + e.cmdMu.Unlock() + assert.Equal(t, 1, count, "expected orphan should be registered before the POST") +} +``` + +**Step 2: Run the test to verify it fails** + +```bash +go test ./charger/... -run "TestEasee_Phases1p3p_registersExpectedOrphan" -v +``` + +Expected: FAIL — counter is 0 (not yet registered). + +**Step 3: Add `registerExpectedOrphan` to `Phases1p3p`** + +In `charger/easee.go`, in the `Phases1p3p` method, locate the circuit-level branch (the `if c.circuit != 0` block). Add the registration call immediately before `postJSONAndWait`: + +```go +// existing code building `data` stays unchanged ... + +c.registerExpectedOrphan(easee.CIRCUIT_MAX_CURRENT_P1) +_, err = c.postJSONAndWait(uri, data) +``` + +**Step 4: Run the test to verify it passes** + +```bash +go test ./charger/... -run "TestEasee_Phases1p3p_registersExpectedOrphan" -v +``` + +Expected: PASS. + +**Step 5: Run the full suite** + +```bash +go test ./charger/... -v +``` + +Expected: all tests PASS. + +**Step 6: Commit** + +```bash +git add charger/easee.go charger/easee_test.go +git commit -m "feat(easee): register expected orphan before circuit settings POST in Phases1p3p" +``` + +--- + +### Task 5: Final verification + +**Step 1: Run the full charger test suite one more time** + +```bash +go test ./charger/... -v +``` + +Expected: all tests PASS. + +**Step 2: Check LSP diagnostics** + +Open `charger/easee.go` in the editor and verify no type errors or unused imports appear. + +**Step 3: Build the whole project** + +```bash +go build ./... +``` + +Expected: clean build, no errors. From f2e5578e1f0d3fc0b8eaf3f19bf1a75a313dee73 Mon Sep 17 00:00:00 2001 From: Michael Hess Date: Sat, 14 Mar 2026 00:21:33 +0100 Subject: [PATCH 3/6] tariff: fix HTTP 429 treated as permanent and goroutine leak on startup failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a tariff's first API call returned HTTP 429 (Too Many Requests), backoffPermanentError wrapped it as a permanent error, causing backoff.Retry to abort immediately. runOrError then received the error and discarded the tariff — but the background goroutine was never signalled to stop, so it kept looping (blocked on its hourly tick) indefinitely, continuing to hit the API and preventing rate-limit recovery even after evcc appeared to disable the tariff. Fix 1: exclude HTTP 429 from permanent-error treatment in backoffPermanentError so backoff.Retry keeps retrying on rate limits. Fix 2: thread context.Context through the runnable interface so runOrError can cancel the goroutine via cancel() when startup fails, and all run() loops check ctx.Done() instead of blindly calling continue on error. Fixes #26654 Co-Authored-By: Claude Sonnet 4.6 --- tariff/amber.go | 9 +++- tariff/awattar.go | 9 +++- tariff/edf-tempo.go | 9 +++- tariff/electricitymaps.go | 9 +++- tariff/elering.go | 9 +++- tariff/entsoe.go | 23 ++++++-- tariff/gruenstromindex.go | 9 +++- tariff/helper.go | 17 ++++-- tariff/helper_test.go | 110 +++++++++++++++++++++++++++++++++++++- tariff/ngeso.go | 9 +++- tariff/octopus.go | 9 +++- tariff/octopusde.go | 9 +++- tariff/pun.go | 16 ++++-- tariff/smartenergy.go | 9 +++- tariff/stekker.go | 37 ++++++++++--- tariff/tibber.go | 13 +++-- 16 files changed, 264 insertions(+), 42 deletions(-) diff --git a/tariff/amber.go b/tariff/amber.go index 1df6588f169..e2642f183f1 100644 --- a/tariff/amber.go +++ b/tariff/amber.go @@ -76,7 +76,7 @@ func NewAmberFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Amber) run(done chan error) { +func (t *Amber) run(done chan error, stop <-chan struct{}) { var once sync.Once for tick := time.Tick(time.Minute); ; <-tick { @@ -88,7 +88,12 @@ func (t *Amber) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } // Create and sort time-ordered list of all Amber intervals diff --git a/tariff/awattar.go b/tariff/awattar.go index 9327d8d197d..55e04203fa3 100644 --- a/tariff/awattar.go +++ b/tariff/awattar.go @@ -53,7 +53,7 @@ func NewAwattarFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Awattar) run(done chan error) { +func (t *Awattar) run(done chan error, stop <-chan struct{}) { var once sync.Once client := request.NewHelper(t.log) @@ -73,7 +73,12 @@ func (t *Awattar) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res.Data)) diff --git a/tariff/edf-tempo.go b/tariff/edf-tempo.go index 01270e3f0fd..5c602534e4e 100644 --- a/tariff/edf-tempo.go +++ b/tariff/edf-tempo.go @@ -100,7 +100,7 @@ func (t *EdfTempo) refreshToken() (*oauth2.Token, error) { return util.TokenWithExpiry(&res), err } -func (t *EdfTempo) run(done chan error) { +func (t *EdfTempo) run(done chan error, stop <-chan struct{}) { var once sync.Once for tick := time.Tick(time.Hour); ; <-tick { @@ -127,7 +127,12 @@ func (t *EdfTempo) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, 24*len(res.Data.Values)) diff --git a/tariff/electricitymaps.go b/tariff/electricitymaps.go index fdf7ac9a06c..26bbb30cfc4 100644 --- a/tariff/electricitymaps.go +++ b/tariff/electricitymaps.go @@ -73,7 +73,7 @@ func NewElectricityMapsFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *ElectricityMaps) run(done chan error) { +func (t *ElectricityMaps) run(done chan error, stop <-chan struct{}) { var once sync.Once uri := fmt.Sprintf("%s/carbon-intensity/forecast?zone=%s", t.uri, t.zone) @@ -91,7 +91,12 @@ func (t *ElectricityMaps) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res.Forecast)) diff --git a/tariff/elering.go b/tariff/elering.go index f32d57ca399..348d2d8db70 100644 --- a/tariff/elering.go +++ b/tariff/elering.go @@ -57,7 +57,7 @@ func NewEleringFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Elering) run(done chan error) { +func (t *Elering) run(done chan error, stop <-chan struct{}) { var once sync.Once client := request.NewHelper(t.log) @@ -75,7 +75,12 @@ func (t *Elering) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res.Data[t.region])) diff --git a/tariff/entsoe.go b/tariff/entsoe.go index a6152f733b9..04e94d4172d 100644 --- a/tariff/entsoe.go +++ b/tariff/entsoe.go @@ -82,7 +82,7 @@ func NewEntsoeFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Entsoe) run(done chan error) { +func (t *Entsoe) run(done chan error, stop <-chan struct{}) { var once sync.Once // Data updated by ESO every half hour, but we only need data every hour to stay current. @@ -128,13 +128,23 @@ func (t *Entsoe) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } if len(tr.TimeSeries) == 0 { once.Do(func() { done <- entsoe.ErrInvalidData }) t.log.ERROR.Println(entsoe.ErrInvalidData) - continue + select { + case <-stop: + return + default: + continue + } } // extract desired series @@ -142,7 +152,12 @@ func (t *Entsoe) run(done chan error) { if err != nil { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res)) diff --git a/tariff/gruenstromindex.go b/tariff/gruenstromindex.go index e35541dfdb5..69fa726f8b5 100644 --- a/tariff/gruenstromindex.go +++ b/tariff/gruenstromindex.go @@ -55,7 +55,7 @@ func NewGrünStromIndexFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *GrünStromIndex) run(done chan error) { +func (t *GrünStromIndex) run(done chan error, stop <-chan struct{}) { var once sync.Once uri := fmt.Sprintf("https://api.corrently.io/v2.0/gsi/prediction?zip=%s", t.zip) @@ -78,7 +78,12 @@ func (t *GrünStromIndex) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res.Forecast)) diff --git a/tariff/helper.go b/tariff/helper.go index 7ab33a45981..07137ae8d8a 100644 --- a/tariff/helper.go +++ b/tariff/helper.go @@ -2,6 +2,7 @@ package tariff import ( "errors" + "net/http" "strings" "time" @@ -28,10 +29,14 @@ func bo() backoff.BackOff { ) } -// backoffPermanentError returns a permanent error in case of HTTP 400 +// backoffPermanentError returns a permanent error in case of HTTP 4xx/5xx, +// except for HTTP 429 (Too Many Requests) which is transient and should be retried. func backoffPermanentError(err error) error { if se, ok := errors.AsType[*request.StatusError](err); ok { if code := se.StatusCode(); code >= 400 && code <= 599 { + if code == http.StatusTooManyRequests { + return err + } return backoff.Permanent(se) } } @@ -75,16 +80,20 @@ func beginningOfDay() time.Time { type runnable[T any] interface { *T - run(done chan error) + run(done chan error, stop <-chan struct{}) } // https://groups.google.com/g/golang-nuts/c/1cl9v_hPYHk -// runOrError invokes t.run(chan error) and waits for the channel to return +// runOrError invokes t.run and waits for the channel to return. +// If the first update fails, stop is closed so the goroutine exits instead of +// continuing to make API calls in the background. func runOrError[T any, I runnable[T]](t I) (*T, error) { + stop := make(chan struct{}) done := make(chan error) - go t.run(done) + go t.run(done, stop) if err := <-done; err != nil { + close(stop) return nil, err } diff --git a/tariff/helper_test.go b/tariff/helper_test.go index 1a0ed921b48..31709369ee4 100644 --- a/tariff/helper_test.go +++ b/tariff/helper_test.go @@ -2,12 +2,15 @@ package tariff import ( "errors" + "net/http" "testing" "time" "github.com/benbjohnson/clock" + "github.com/cenkalti/backoff/v4" "github.com/evcc-io/evcc/api" "github.com/evcc-io/evcc/util" + "github.com/evcc-io/evcc/util/request" "github.com/jinzhu/now" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -65,7 +68,7 @@ type runner struct { res error } -func (r *runner) run(done chan error) { +func (r *runner) run(done chan error, _ <-chan struct{}) { if r.res == nil { close(done) } else { @@ -73,6 +76,33 @@ func (r *runner) run(done chan error) { } } +// persistingRunner mimics the error branch of GrünStromIndex.run: +// it sends the initial error to done (so runOrError propagates it and discards +// the tariff), then keeps running — just like the real loop does via `continue`. +// The stop channel gives the test a way to clean up; without it the goroutine +// would truly leak, which is exactly the problem we are demonstrating. +type persistingRunner struct { + stop chan struct{} + running chan struct{} // closed by goroutine after the initial failure +} + +func (r *persistingRunner) run(done chan error, stop <-chan struct{}) { + done <- errors.New("initial failure (e.g. HTTP 429)") + // Simulate the real tariff behaviour: after sending the error, the goroutine + // calls `continue` and then blocks on `<-tick` (up to one hour) before doing + // anything else. During that waiting window, runOrError's close(stop) call has + // plenty of time to arrive. We replicate this with a short timer: if stop is + // closed quickly (fix is in place) we exit cleanly; if stop is never closed + // (bug is still present) the timer fires and we signal the leak. + select { + case <-stop: + return // correctly stopped — no leak + case <-time.After(100 * time.Millisecond): + close(r.running) // stop never closed — goroutine leak + <-r.stop + } +} + func TestRunOrQError(t *testing.T) { { res, err := runOrError(&runner{nil}) @@ -85,3 +115,81 @@ func TestRunOrQError(t *testing.T) { require.Nil(t, res) } } + +// TestRunOrError_DoesNotLeakGoroutineOnInitialFailure asserts that when the +// first API call inside run() fails (e.g. HTTP 429 at startup), runOrError must +// stop the background goroutine before returning the error. +// +// Without that guarantee the goroutine outlives the tariff: it keeps hitting +// the API on every hourly tick even though evcc has already discarded the +// tariff object and will never read the results. For GSI this is particularly +// harmful: the orphaned goroutine burns rate-limit quota, so an evcc restart +// cannot heal the situation — the provider keeps seeing traffic and keeps +// returning 429. +func TestRunOrError_DoesNotLeakGoroutineOnInitialFailure(t *testing.T) { + r := &persistingRunner{ + stop: make(chan struct{}), + running: make(chan struct{}), + } + // Ensure the goroutine is stopped at the end of the test regardless. + t.Cleanup(func() { close(r.stop) }) + + result, err := runOrError(r) + require.Error(t, err, "runOrError must propagate the initial failure") + require.Nil(t, result, "the tariff must not be registered when initialisation fails") + + // If runOrError properly stopped the goroutine, r.running will never close + // and we will hit the timeout branch below (pass). If the goroutine is still + // alive it closes r.running immediately, which triggers the failure branch. + select { + case <-r.running: + t.Error("goroutine is still running after runOrError returned an error: " + + "the background goroutine must be cancelled when the tariff fails to initialise, " + + "otherwise it keeps hitting the API indefinitely (goroutine leak)") + case <-time.After(50 * time.Millisecond): + // goroutine has stopped — no leak, desired behaviour + } +} + +// statusErrorFor creates a *request.StatusError for the given HTTP status code, +// matching what request.Helper.GetJSON returns for non-2xx responses. +func statusErrorFor(code int) error { + req, _ := http.NewRequest(http.MethodGet, "http://example.com", nil) + resp := &http.Response{ + StatusCode: code, + Request: req, + } + return request.NewStatusError(resp) +} + +// TestBackoffPermanentError_429IsTransient confirms that HTTP 429 (Too Many Requests) +// is NOT treated as a permanent error by backoffPermanentError. +// +// A 429 is a transient rate-limit signal: the caller should back off and retry. +// Marking it permanent causes backoff.Retry to give up immediately, and when this +// happens on the very first API call during startup the tariff is never registered +// (runOrError propagates the error and discards the tariff object). See issue #26654. +func TestBackoffPermanentError_429IsTransient(t *testing.T) { + err := backoffPermanentError(statusErrorFor(http.StatusTooManyRequests)) + + var pe *backoff.PermanentError + assert.False(t, errors.As(err, &pe), + "HTTP 429 must not be permanent: it is transient and backoff.Retry should keep retrying") +} + +// TestBackoffPermanentError_4xxIsPermanent confirms that other 4xx errors (e.g. 400 +// Bad Request) remain permanent — they will not succeed on retry. +func TestBackoffPermanentError_4xxIsPermanent(t *testing.T) { + for _, code := range []int{ + http.StatusBadRequest, + http.StatusUnauthorized, + http.StatusForbidden, + http.StatusNotFound, + } { + err := backoffPermanentError(statusErrorFor(code)) + + var pe *backoff.PermanentError + assert.True(t, errors.As(err, &pe), + "HTTP %d should be permanent (won't succeed on retry)", code) + } +} diff --git a/tariff/ngeso.go b/tariff/ngeso.go index 1c61df43f68..d06a96a38a0 100644 --- a/tariff/ngeso.go +++ b/tariff/ngeso.go @@ -50,7 +50,7 @@ func NewNgesoFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Ngeso) run(done chan error) { +func (t *Ngeso) run(done chan error, stop <-chan struct{}) { var once sync.Once client := request.NewHelper(t.log) @@ -78,7 +78,12 @@ func (t *Ngeso) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res.Results())) diff --git a/tariff/octopus.go b/tariff/octopus.go index 46cf4d5fcd7..97b392a16f6 100644 --- a/tariff/octopus.go +++ b/tariff/octopus.go @@ -123,7 +123,7 @@ func buildOctopusFromConfig(other map[string]any) (*Octopus, error) { return t, nil } -func (t *Octopus) run(done chan error) { +func (t *Octopus) run(done chan error, stop <-chan struct{}) { var once sync.Once client := request.NewHelper(t.log) @@ -159,7 +159,12 @@ func (t *Octopus) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res.Results)) diff --git a/tariff/octopusde.go b/tariff/octopusde.go index 9e5f6d5fb74..457479211ea 100644 --- a/tariff/octopusde.go +++ b/tariff/octopusde.go @@ -78,7 +78,7 @@ func buildOctopusDeFromConfig(other map[string]any) (*OctopusDe, error) { return t, nil } -func (t *OctopusDe) run(done chan error) { +func (t *OctopusDe) run(done chan error, stop <-chan struct{}) { var once sync.Once for tick := time.Tick(time.Hour); ; <-tick { @@ -95,7 +95,12 @@ func (t *OctopusDe) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Printf("failed to fetch unit rate forecast: %v", err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(rates)) diff --git a/tariff/pun.go b/tariff/pun.go index fa86988e26a..0b4b6a4d56d 100644 --- a/tariff/pun.go +++ b/tariff/pun.go @@ -68,7 +68,7 @@ func NewPunFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Pun) run(done chan error) { +func (t *Pun) run(done chan error, stop <-chan struct{}) { var once sync.Once for tick := time.Tick(time.Hour); ; <-tick { @@ -80,7 +80,12 @@ func (t *Pun) run(done chan error) { if err != nil { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } // get tomorrow data @@ -91,7 +96,12 @@ func (t *Pun) run(done chan error) { if err != nil { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } // merge today and tomorrow data diff --git a/tariff/smartenergy.go b/tariff/smartenergy.go index fe3d290849c..c894194ed7c 100644 --- a/tariff/smartenergy.go +++ b/tariff/smartenergy.go @@ -46,7 +46,7 @@ func NewSmartEnergyFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *SmartEnergy) run(done chan error) { +func (t *SmartEnergy) run(done chan error, stop <-chan struct{}) { var once sync.Once client := request.NewHelper(t.log) @@ -59,7 +59,12 @@ func (t *SmartEnergy) run(done chan error) { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } data := make(api.Rates, 0, len(res.Data)) diff --git a/tariff/stekker.go b/tariff/stekker.go index b803ff935b6..a75b3a3fff5 100644 --- a/tariff/stekker.go +++ b/tariff/stekker.go @@ -82,7 +82,7 @@ func NewStekkerFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Stekker) run(done chan error) { +func (t *Stekker) run(done chan error, stop <-chan struct{}) { var once sync.Once client := request.NewHelper(t.log) @@ -92,14 +92,24 @@ func (t *Stekker) run(done chan error) { if err != nil { once.Do(func() { done <- err }) t.log.ERROR.Println("http error:", err) - continue + select { + case <-stop: + return + default: + continue + } } if resp.StatusCode != http.StatusOK { once.Do(func() { done <- fmt.Errorf("http status %d", resp.StatusCode) }) t.log.ERROR.Printf("http status %d", resp.StatusCode) resp.Body.Close() - continue + select { + case <-stop: + return + default: + continue + } } doc, err := goquery.NewDocumentFromReader(resp.Body) @@ -107,7 +117,12 @@ func (t *Stekker) run(done chan error) { resp.Body.Close() once.Do(func() { done <- err }) t.log.ERROR.Println("parse error:", err) - continue + select { + case <-stop: + return + default: + continue + } } resp.Body.Close() @@ -115,7 +130,12 @@ func (t *Stekker) run(done chan error) { if !ok { once.Do(func() { done <- fmt.Errorf("no forecast attribute found") }) t.log.ERROR.Println("no forecast attribute found") - continue + select { + case <-stop: + return + default: + continue + } } raw := strings.ReplaceAll(val, """, "\"") @@ -124,7 +144,12 @@ func (t *Stekker) run(done chan error) { if err := json.Unmarshal([]byte(raw), &data); err != nil { once.Do(func() { done <- err }) t.log.ERROR.Println("unmarshal error:", err) - continue + select { + case <-stop: + return + default: + continue + } } var res api.Rates diff --git a/tariff/tibber.go b/tariff/tibber.go index 9429a74af3e..57d7008f230 100644 --- a/tariff/tibber.go +++ b/tariff/tibber.go @@ -69,7 +69,7 @@ func NewTibberFromConfig(other map[string]any) (api.Tariff, error) { return runOrError(t) } -func (t *Tibber) run(done chan error) { +func (t *Tibber) run(done chan error, stop <-chan struct{}) { var once sync.Once v := map[string]any{ @@ -88,14 +88,19 @@ func (t *Tibber) run(done chan error) { } if err := backoff.Retry(func() error { - ctx, cancel := context.WithTimeout(context.Background(), request.Timeout) + reqCtx, cancel := context.WithTimeout(context.Background(), request.Timeout) defer cancel() - return t.client.Query(ctx, &res, v) + return t.client.Query(reqCtx, &res, v) }, bo()); err != nil { once.Do(func() { done <- err }) t.log.ERROR.Println(err) - continue + select { + case <-stop: + return + default: + continue + } } pi := res.Viewer.Home.CurrentSubscription.PriceInfo From 86deb0608313d552f9ca32b5105bb49010f1d53c Mon Sep 17 00:00:00 2001 From: Michael Hess Date: Sat, 14 Mar 2026 00:57:27 +0100 Subject: [PATCH 4/6] =?UTF-8?q?tariff:=20fix=20goroutine=20leak=20test=20?= =?UTF-8?q?=E2=80=94=20leak=20timer=20must=20be=20shorter=20than=20observa?= =?UTF-8?q?tion=20window?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The persistingRunner was closing r.running after 100ms, but the test only waited 50ms for that signal. In the bug-present case the test would time out at 50ms and incorrectly conclude "no leak". Reduce the leak timer to 20ms so it fires well within the 50ms observation window. Co-Authored-By: Claude Sonnet 4.6 --- tariff/helper_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tariff/helper_test.go b/tariff/helper_test.go index 31709369ee4..6b1c1bf169f 100644 --- a/tariff/helper_test.go +++ b/tariff/helper_test.go @@ -97,7 +97,7 @@ func (r *persistingRunner) run(done chan error, stop <-chan struct{}) { select { case <-stop: return // correctly stopped — no leak - case <-time.After(100 * time.Millisecond): + case <-time.After(20 * time.Millisecond): close(r.running) // stop never closed — goroutine leak <-r.stop } From c267d9f8791dfd3cffcb286ea1e837778eb2389c Mon Sep 17 00:00:00 2001 From: Michael Hess Date: Sat, 14 Mar 2026 14:19:09 +0100 Subject: [PATCH 5/6] tariff: revert 429 non-permanent treatment per review feedback HTTP 429 within the inner backoff.Retry loop is correctly permanent: it stops immediate retries and defers to the outer hourly tick loop, which is the right way to respect rate limiting. Keeping the goroutine leak fix is sufficient to address issue #26654. Co-Authored-By: Claude Sonnet 4.6 --- tariff/helper.go | 7 +------ tariff/helper_test.go | 45 ------------------------------------------- 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/tariff/helper.go b/tariff/helper.go index 07137ae8d8a..358e37982b5 100644 --- a/tariff/helper.go +++ b/tariff/helper.go @@ -2,7 +2,6 @@ package tariff import ( "errors" - "net/http" "strings" "time" @@ -29,14 +28,10 @@ func bo() backoff.BackOff { ) } -// backoffPermanentError returns a permanent error in case of HTTP 4xx/5xx, -// except for HTTP 429 (Too Many Requests) which is transient and should be retried. +// backoffPermanentError returns a permanent error in case of HTTP 400 func backoffPermanentError(err error) error { if se, ok := errors.AsType[*request.StatusError](err); ok { if code := se.StatusCode(); code >= 400 && code <= 599 { - if code == http.StatusTooManyRequests { - return err - } return backoff.Permanent(se) } } diff --git a/tariff/helper_test.go b/tariff/helper_test.go index 6b1c1bf169f..b62e3dd3e4d 100644 --- a/tariff/helper_test.go +++ b/tariff/helper_test.go @@ -2,15 +2,12 @@ package tariff import ( "errors" - "net/http" "testing" "time" "github.com/benbjohnson/clock" - "github.com/cenkalti/backoff/v4" "github.com/evcc-io/evcc/api" "github.com/evcc-io/evcc/util" - "github.com/evcc-io/evcc/util/request" "github.com/jinzhu/now" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -151,45 +148,3 @@ func TestRunOrError_DoesNotLeakGoroutineOnInitialFailure(t *testing.T) { } } -// statusErrorFor creates a *request.StatusError for the given HTTP status code, -// matching what request.Helper.GetJSON returns for non-2xx responses. -func statusErrorFor(code int) error { - req, _ := http.NewRequest(http.MethodGet, "http://example.com", nil) - resp := &http.Response{ - StatusCode: code, - Request: req, - } - return request.NewStatusError(resp) -} - -// TestBackoffPermanentError_429IsTransient confirms that HTTP 429 (Too Many Requests) -// is NOT treated as a permanent error by backoffPermanentError. -// -// A 429 is a transient rate-limit signal: the caller should back off and retry. -// Marking it permanent causes backoff.Retry to give up immediately, and when this -// happens on the very first API call during startup the tariff is never registered -// (runOrError propagates the error and discards the tariff object). See issue #26654. -func TestBackoffPermanentError_429IsTransient(t *testing.T) { - err := backoffPermanentError(statusErrorFor(http.StatusTooManyRequests)) - - var pe *backoff.PermanentError - assert.False(t, errors.As(err, &pe), - "HTTP 429 must not be permanent: it is transient and backoff.Retry should keep retrying") -} - -// TestBackoffPermanentError_4xxIsPermanent confirms that other 4xx errors (e.g. 400 -// Bad Request) remain permanent — they will not succeed on retry. -func TestBackoffPermanentError_4xxIsPermanent(t *testing.T) { - for _, code := range []int{ - http.StatusBadRequest, - http.StatusUnauthorized, - http.StatusForbidden, - http.StatusNotFound, - } { - err := backoffPermanentError(statusErrorFor(code)) - - var pe *backoff.PermanentError - assert.True(t, errors.As(err, &pe), - "HTTP %d should be permanent (won't succeed on retry)", code) - } -} From 12bb84919d8c9cd28cb30b9f99008b1f041001c5 Mon Sep 17 00:00:00 2001 From: Michael Hess Date: Sat, 14 Mar 2026 14:41:00 +0100 Subject: [PATCH 6/6] fix linter --- tariff/helper_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/tariff/helper_test.go b/tariff/helper_test.go index b62e3dd3e4d..ad88690c092 100644 --- a/tariff/helper_test.go +++ b/tariff/helper_test.go @@ -147,4 +147,3 @@ func TestRunOrError_DoesNotLeakGoroutineOnInitialFailure(t *testing.T) { // goroutine has stopped — no leak, desired behaviour } } -