Merge origin/develop into feat/um982-gps-driver
Co-authored-by: JJassonn69 <83615043+JJassonn69@users.noreply.github.com>
This commit is contained in:
@@ -622,7 +622,8 @@ typedef enum {
|
|||||||
ERROR_POWER_SUPPLY,
|
ERROR_POWER_SUPPLY,
|
||||||
ERROR_TEMPERATURE_HIGH,
|
ERROR_TEMPERATURE_HIGH,
|
||||||
ERROR_MEMORY_ALLOC,
|
ERROR_MEMORY_ALLOC,
|
||||||
ERROR_WATCHDOG_TIMEOUT
|
ERROR_WATCHDOG_TIMEOUT,
|
||||||
|
ERROR_COUNT // must be last — used for bounds checking error_strings[]
|
||||||
} SystemError_t;
|
} SystemError_t;
|
||||||
|
|
||||||
static SystemError_t last_error = ERROR_NONE;
|
static SystemError_t last_error = ERROR_NONE;
|
||||||
@@ -633,6 +634,27 @@ static bool system_emergency_state = false;
|
|||||||
SystemError_t checkSystemHealth(void) {
|
SystemError_t checkSystemHealth(void) {
|
||||||
SystemError_t current_error = ERROR_NONE;
|
SystemError_t current_error = ERROR_NONE;
|
||||||
|
|
||||||
|
// 0. Watchdog: detect main-loop stall (checkSystemHealth not called for >60 s).
|
||||||
|
// Timestamp is captured at function ENTRY and updated unconditionally, so
|
||||||
|
// any early return from a sub-check below cannot leave a stale value that
|
||||||
|
// would later trip a spurious ERROR_WATCHDOG_TIMEOUT. A dedicated cold-start
|
||||||
|
// branch ensures the first call after boot never trips (last_health_check==0
|
||||||
|
// would otherwise make `HAL_GetTick() - 0 > 60000` true forever after the
|
||||||
|
// 60-s mark of the init sequence).
|
||||||
|
static uint32_t last_health_check = 0;
|
||||||
|
uint32_t now_tick = HAL_GetTick();
|
||||||
|
if (last_health_check == 0) {
|
||||||
|
last_health_check = now_tick; // cold start: seed only
|
||||||
|
} else {
|
||||||
|
uint32_t elapsed = now_tick - last_health_check;
|
||||||
|
last_health_check = now_tick; // update BEFORE any early return
|
||||||
|
if (elapsed > 60000) {
|
||||||
|
current_error = ERROR_WATCHDOG_TIMEOUT;
|
||||||
|
DIAG_ERR("SYS", "Health check: Watchdog timeout (>60s since last check)");
|
||||||
|
return current_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 1. Check AD9523 Clock Generator
|
// 1. Check AD9523 Clock Generator
|
||||||
static uint32_t last_clock_check = 0;
|
static uint32_t last_clock_check = 0;
|
||||||
if (HAL_GetTick() - last_clock_check > 5000) {
|
if (HAL_GetTick() - last_clock_check > 5000) {
|
||||||
@@ -733,14 +755,7 @@ SystemError_t checkSystemHealth(void) {
|
|||||||
return current_error;
|
return current_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 9. Simple watchdog check
|
// 9. Watchdog check is performed at function entry (see step 0).
|
||||||
static uint32_t last_health_check = 0;
|
|
||||||
if (HAL_GetTick() - last_health_check > 60000) {
|
|
||||||
current_error = ERROR_WATCHDOG_TIMEOUT;
|
|
||||||
DIAG_ERR("SYS", "Health check: Watchdog timeout (>60s since last check)");
|
|
||||||
return current_error;
|
|
||||||
}
|
|
||||||
last_health_check = HAL_GetTick();
|
|
||||||
|
|
||||||
if (current_error != ERROR_NONE) {
|
if (current_error != ERROR_NONE) {
|
||||||
DIAG_ERR("SYS", "checkSystemHealth returning error code %d", current_error);
|
DIAG_ERR("SYS", "checkSystemHealth returning error code %d", current_error);
|
||||||
@@ -852,7 +867,7 @@ void handleSystemError(SystemError_t error) {
|
|||||||
DIAG_ERR("SYS", "handleSystemError: error=%d error_count=%lu", error, error_count);
|
DIAG_ERR("SYS", "handleSystemError: error=%d error_count=%lu", error, error_count);
|
||||||
|
|
||||||
char error_msg[100];
|
char error_msg[100];
|
||||||
const char* error_strings[] = {
|
static const char* const error_strings[] = {
|
||||||
"No error",
|
"No error",
|
||||||
"AD9523 Clock failure",
|
"AD9523 Clock failure",
|
||||||
"ADF4382 TX LO unlocked",
|
"ADF4382 TX LO unlocked",
|
||||||
@@ -872,9 +887,16 @@ void handleSystemError(SystemError_t error) {
|
|||||||
"Watchdog timeout"
|
"Watchdog timeout"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(error_strings) / sizeof(error_strings[0]) == ERROR_COUNT,
|
||||||
|
"error_strings[] and SystemError_t enum are out of sync");
|
||||||
|
|
||||||
|
const char* err_name = (error >= 0 && error < (int)(sizeof(error_strings) / sizeof(error_strings[0])))
|
||||||
|
? error_strings[error]
|
||||||
|
: "Unknown error";
|
||||||
|
|
||||||
snprintf(error_msg, sizeof(error_msg),
|
snprintf(error_msg, sizeof(error_msg),
|
||||||
"ERROR #%d: %s (Count: %lu)\r\n",
|
"ERROR #%d: %s (Count: %lu)\r\n",
|
||||||
error, error_strings[error], error_count);
|
error, err_name, error_count);
|
||||||
HAL_UART_Transmit(&huart3, (uint8_t*)error_msg, strlen(error_msg), 1000);
|
HAL_UART_Transmit(&huart3, (uint8_t*)error_msg, strlen(error_msg), 1000);
|
||||||
|
|
||||||
// Blink LED pattern based on error code
|
// Blink LED pattern based on error code
|
||||||
@@ -900,7 +922,7 @@ void handleSystemError(SystemError_t error) {
|
|||||||
if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) ||
|
if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) ||
|
||||||
error == ERROR_TEMPERATURE_HIGH ||
|
error == ERROR_TEMPERATURE_HIGH ||
|
||||||
error == ERROR_WATCHDOG_TIMEOUT) {
|
error == ERROR_WATCHDOG_TIMEOUT) {
|
||||||
DIAG_ERR("SYS", "CRITICAL ERROR (code %d: %s) -- initiating Emergency_Stop()", error, error_strings[error]);
|
DIAG_ERR("SYS", "CRITICAL ERROR (code %d: %s) -- initiating Emergency_Stop()", error, err_name);
|
||||||
snprintf(error_msg, sizeof(error_msg),
|
snprintf(error_msg, sizeof(error_msg),
|
||||||
"CRITICAL ERROR! Initiating emergency shutdown.\r\n");
|
"CRITICAL ERROR! Initiating emergency shutdown.\r\n");
|
||||||
HAL_UART_Transmit(&huart3, (uint8_t*)error_msg, strlen(error_msg), 1000);
|
HAL_UART_Transmit(&huart3, (uint8_t*)error_msg, strlen(error_msg), 1000);
|
||||||
|
|||||||
@@ -3,26 +3,38 @@
|
|||||||
*.dSYM/
|
*.dSYM/
|
||||||
|
|
||||||
# Test binaries (built by Makefile)
|
# Test binaries (built by Makefile)
|
||||||
|
# TESTS_WITH_REAL
|
||||||
test_bug1_timed_sync_init_ordering
|
test_bug1_timed_sync_init_ordering
|
||||||
test_bug2_ad9523_double_setup
|
|
||||||
test_bug3_timed_sync_noop
|
test_bug3_timed_sync_noop
|
||||||
test_bug4_phase_shift_before_check
|
test_bug4_phase_shift_before_check
|
||||||
test_bug5_fine_phase_gpio_only
|
test_bug5_fine_phase_gpio_only
|
||||||
|
test_bug9_platform_ops_null
|
||||||
|
test_bug10_spi_cs_not_toggled
|
||||||
|
test_bug15_htim3_dangling_extern
|
||||||
|
|
||||||
|
# TESTS_MOCK_ONLY
|
||||||
|
test_bug2_ad9523_double_setup
|
||||||
test_bug6_timer_variable_collision
|
test_bug6_timer_variable_collision
|
||||||
test_bug7_gpio_pin_conflict
|
test_bug7_gpio_pin_conflict
|
||||||
test_bug8_uart_commented_out
|
test_bug8_uart_commented_out
|
||||||
test_bug9_platform_ops_null
|
test_bug14_diag_section_args
|
||||||
test_bug10_spi_cs_not_toggled
|
test_gap3_emergency_stop_rails
|
||||||
test_bug11_platform_spi_transmit_only
|
|
||||||
|
# TESTS_STANDALONE
|
||||||
test_bug12_pa_cal_loop_inverted
|
test_bug12_pa_cal_loop_inverted
|
||||||
test_bug13_dac2_adc_buffer_mismatch
|
test_bug13_dac2_adc_buffer_mismatch
|
||||||
test_bug14_diag_section_args
|
|
||||||
test_bug15_htim3_dangling_extern
|
|
||||||
test_agc_outer_loop
|
|
||||||
test_gap3_emergency_state_ordering
|
|
||||||
test_gap3_emergency_stop_rails
|
|
||||||
test_gap3_idq_periodic_reread
|
|
||||||
test_gap3_iwdg_config
|
test_gap3_iwdg_config
|
||||||
test_gap3_overtemp_emergency_stop
|
|
||||||
test_gap3_temperature_max
|
test_gap3_temperature_max
|
||||||
|
test_gap3_idq_periodic_reread
|
||||||
|
test_gap3_emergency_state_ordering
|
||||||
|
test_gap3_overtemp_emergency_stop
|
||||||
|
test_gap3_health_watchdog_cold_start
|
||||||
|
|
||||||
|
# TESTS_WITH_PLATFORM
|
||||||
|
test_bug11_platform_spi_transmit_only
|
||||||
|
|
||||||
|
# TESTS_WITH_CXX
|
||||||
|
test_agc_outer_loop
|
||||||
|
|
||||||
|
# Manual / one-off test builds
|
||||||
test_um982_gps
|
test_um982_gps
|
||||||
|
|||||||
@@ -69,7 +69,8 @@ TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \
|
|||||||
test_gap3_temperature_max \
|
test_gap3_temperature_max \
|
||||||
test_gap3_idq_periodic_reread \
|
test_gap3_idq_periodic_reread \
|
||||||
test_gap3_emergency_state_ordering \
|
test_gap3_emergency_state_ordering \
|
||||||
test_gap3_overtemp_emergency_stop
|
test_gap3_overtemp_emergency_stop \
|
||||||
|
test_gap3_health_watchdog_cold_start
|
||||||
|
|
||||||
# Tests that need platform_noos_stm32.o + mocks
|
# Tests that need platform_noos_stm32.o + mocks
|
||||||
TESTS_WITH_PLATFORM := test_bug11_platform_spi_transmit_only
|
TESTS_WITH_PLATFORM := test_bug11_platform_spi_transmit_only
|
||||||
@@ -85,7 +86,7 @@ ALL_TESTS := $(TESTS_WITH_REAL) $(TESTS_MOCK_ONLY) $(TESTS_STANDALONE) $(TESTS_W
|
|||||||
.PHONY: all build test clean \
|
.PHONY: all build test clean \
|
||||||
$(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \
|
$(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \
|
||||||
test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \
|
test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \
|
||||||
test_gap3_overtemp
|
test_gap3_overtemp test_gap3_wdog
|
||||||
|
|
||||||
all: build test
|
all: build test
|
||||||
|
|
||||||
@@ -174,6 +175,9 @@ test_gap3_emergency_state_ordering: test_gap3_emergency_state_ordering.c
|
|||||||
test_gap3_overtemp_emergency_stop: test_gap3_overtemp_emergency_stop.c
|
test_gap3_overtemp_emergency_stop: test_gap3_overtemp_emergency_stop.c
|
||||||
$(CC) $(CFLAGS) $< -o $@
|
$(CC) $(CFLAGS) $< -o $@
|
||||||
|
|
||||||
|
test_gap3_health_watchdog_cold_start: test_gap3_health_watchdog_cold_start.c
|
||||||
|
$(CC) $(CFLAGS) $< -o $@
|
||||||
|
|
||||||
# Tests that need platform_noos_stm32.o + mocks
|
# Tests that need platform_noos_stm32.o + mocks
|
||||||
$(TESTS_WITH_PLATFORM): %: %.c $(MOCK_OBJS) $(PLATFORM_OBJ)
|
$(TESTS_WITH_PLATFORM): %: %.c $(MOCK_OBJS) $(PLATFORM_OBJ)
|
||||||
$(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) $(PLATFORM_OBJ) -o $@
|
$(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) $(PLATFORM_OBJ) -o $@
|
||||||
@@ -275,6 +279,9 @@ test_gap3_order: test_gap3_emergency_state_ordering
|
|||||||
test_gap3_overtemp: test_gap3_overtemp_emergency_stop
|
test_gap3_overtemp: test_gap3_overtemp_emergency_stop
|
||||||
./test_gap3_overtemp_emergency_stop
|
./test_gap3_overtemp_emergency_stop
|
||||||
|
|
||||||
|
test_gap3_wdog: test_gap3_health_watchdog_cold_start
|
||||||
|
./test_gap3_health_watchdog_cold_start
|
||||||
|
|
||||||
# --- Clean ---
|
# --- Clean ---
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|||||||
@@ -0,0 +1,132 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* test_gap3_health_watchdog_cold_start.c
|
||||||
|
*
|
||||||
|
* Safety bug: checkSystemHealth()'s internal watchdog (step 9, pre-fix) had two
|
||||||
|
* linked defects that, once ERROR_WATCHDOG_TIMEOUT was escalated to
|
||||||
|
* Emergency_Stop() by the overtemp/watchdog PR, would false-latch the radar:
|
||||||
|
*
|
||||||
|
* (1) Cold-start false trip:
|
||||||
|
* static uint32_t last_health_check = 0;
|
||||||
|
* if (HAL_GetTick() - last_health_check > 60000) { ... }
|
||||||
|
* On the very first call, last_health_check == 0, so once the MCU has
|
||||||
|
* been up >60 s (which is typical after the ADAR1000 / AD9523 / ADF4382
|
||||||
|
* init sequence) the subtraction `now - 0` exceeds 60 000 ms and the
|
||||||
|
* watchdog trips spuriously.
|
||||||
|
*
|
||||||
|
* (2) Stale-timestamp after early returns:
|
||||||
|
* last_health_check = HAL_GetTick(); // at END of function
|
||||||
|
* Every earlier sub-check (IMU, BMP180, GPS, PA Idq, temperature) has an
|
||||||
|
* `if (fault) return current_error;` path that skips the update. After a
|
||||||
|
* cumulative 60 s of transient faults, the next clean call compares
|
||||||
|
* `now` against the long-stale `last_health_check` and trips.
|
||||||
|
*
|
||||||
|
* After fix: Watchdog logic moved to function ENTRY. A dedicated cold-start
|
||||||
|
* branch seeds the timestamp on the first call without checking.
|
||||||
|
* On every subsequent call, the elapsed delta is captured FIRST
|
||||||
|
* and last_health_check is updated BEFORE any sub-check runs, so
|
||||||
|
* early returns no longer leave a stale value.
|
||||||
|
*
|
||||||
|
* Test strategy:
|
||||||
|
* Extract the post-fix watchdog predicate into a standalone function that
|
||||||
|
* takes a simulated HAL_GetTick() value and returns whether the watchdog
|
||||||
|
* should trip. Walk through boot + fault sequences that would have tripped
|
||||||
|
* the pre-fix code and assert the post-fix code does NOT trip.
|
||||||
|
******************************************************************************/
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
/* --- Post-fix watchdog state + predicate, extracted verbatim --- */
|
||||||
|
static uint32_t last_health_check = 0;
|
||||||
|
|
||||||
|
/* Returns 1 iff this call should raise ERROR_WATCHDOG_TIMEOUT.
|
||||||
|
Updates last_health_check BEFORE returning (matches post-fix behaviour). */
|
||||||
|
static int health_watchdog_step(uint32_t now_tick)
|
||||||
|
{
|
||||||
|
if (last_health_check == 0) {
|
||||||
|
last_health_check = now_tick; /* cold start: seed only, never trip */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
uint32_t elapsed = now_tick - last_health_check;
|
||||||
|
last_health_check = now_tick; /* update BEFORE any early return */
|
||||||
|
return (elapsed > 60000) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test helper: reset the static state between scenarios. */
|
||||||
|
static void reset_state(void) { last_health_check = 0; }
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
printf("=== Safety fix: checkSystemHealth() watchdog cold-start + stale-ts ===\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 1: cold-start after 60 s of init must NOT trip ---- */
|
||||||
|
printf(" Test 1: first call at t=75000 ms (post-init) does not trip... ");
|
||||||
|
reset_state();
|
||||||
|
assert(health_watchdog_step(75000) == 0);
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 2: first call far beyond 60 s (PRE-FIX BUG) ------- */
|
||||||
|
printf(" Test 2: first call at t=600000 ms still does not trip... ");
|
||||||
|
reset_state();
|
||||||
|
assert(health_watchdog_step(600000) == 0);
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 3: healthy main-loop pacing (10 ms period) -------- */
|
||||||
|
printf(" Test 3: 1000 calls at 10 ms intervals never trip... ");
|
||||||
|
reset_state();
|
||||||
|
(void)health_watchdog_step(1000); /* seed */
|
||||||
|
for (int i = 1; i <= 1000; i++) {
|
||||||
|
assert(health_watchdog_step(1000 + i * 10) == 0);
|
||||||
|
}
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 4: stale-timestamp after a burst of early returns -
|
||||||
|
Pre-fix bug: many early returns skipped the timestamp update, so a
|
||||||
|
later clean call would compare `now` against a 60+ s old value. Post-fix,
|
||||||
|
every call (including ones that would have early-returned in the real
|
||||||
|
function) updates the timestamp at the top, so this scenario is modelled
|
||||||
|
by calling health_watchdog_step() on every iteration of the main loop. */
|
||||||
|
printf(" Test 4: 70 s of 100 ms-spaced calls after seed do not trip... ");
|
||||||
|
reset_state();
|
||||||
|
(void)health_watchdog_step(50000); /* seed mid-run */
|
||||||
|
for (int i = 1; i <= 700; i++) { /* 70 s @ 100 ms */
|
||||||
|
int tripped = health_watchdog_step(50000 + i * 100);
|
||||||
|
assert(tripped == 0);
|
||||||
|
}
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 5: genuine stall MUST trip ------------------------ */
|
||||||
|
printf(" Test 5: real 60+ s gap between calls does trip... ");
|
||||||
|
reset_state();
|
||||||
|
(void)health_watchdog_step(10000); /* seed */
|
||||||
|
assert(health_watchdog_step(10000 + 60001) == 1);
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 6: exactly 60 s gap is the boundary -- do NOT trip
|
||||||
|
Post-fix predicate uses strict >60000, matching the pre-fix comparator. */
|
||||||
|
printf(" Test 6: exactly 60000 ms gap does not trip (boundary)... ");
|
||||||
|
reset_state();
|
||||||
|
(void)health_watchdog_step(10000);
|
||||||
|
assert(health_watchdog_step(10000 + 60000) == 0);
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 7: trip, then recover on next paced call ---------- */
|
||||||
|
printf(" Test 7: after a genuine stall+trip, next paced call does not re-trip... ");
|
||||||
|
reset_state();
|
||||||
|
(void)health_watchdog_step(5000); /* seed */
|
||||||
|
assert(health_watchdog_step(5000 + 70000) == 1); /* stall -> trip */
|
||||||
|
assert(health_watchdog_step(5000 + 70000 + 10) == 0); /* resume paced */
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* ---------- Scenario 8: HAL_GetTick() 32-bit wrap (~49.7 days) ---------
|
||||||
|
Because we subtract unsigned 32-bit values, wrap is handled correctly as
|
||||||
|
long as the true elapsed time is < 2^32 ms. */
|
||||||
|
printf(" Test 8: tick wrap from 0xFFFFFF00 -> 0x00000064 (200 ms span) does not trip... ");
|
||||||
|
reset_state();
|
||||||
|
(void)health_watchdog_step(0xFFFFFF00u);
|
||||||
|
assert(health_watchdog_step(0x00000064u) == 0); /* elapsed = 0x164 = 356 ms */
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf("\n=== Safety fix: ALL TESTS PASSED ===\n\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user