fix(mcu): escalate overtemp and watchdog-timeout faults to Emergency_Stop()
handleSystemError() only called Emergency_Stop() for error codes in
[ERROR_RF_PA_OVERCURRENT .. ERROR_POWER_SUPPLY] (9..13). Two critical
faults were left out of the gate and fell through to attemptErrorRecovery()'s
default log-and-continue branch:
- ERROR_TEMPERATURE_HIGH (14): raised by checkSystemHealth() when the
hottest of 8 PA thermal sensors exceeds 75 C. Without cutting bias
(DAC CLR) and the PA 5V0/5V5/RFPA_VDD rails, the 10 W GaN QPA2962
stages remain biased in an overtemperature state -- a thermal-runaway
path in AERIS-10E.
- ERROR_WATCHDOG_TIMEOUT (16): indicates the health-check loop has
stalled (>60 s since last pass). Transmitter state is unknown;
relying on IWDG to reset the MCU re-runs startup and re-energises
the PA rails rather than latching the safe state.
Fix: extend the critical-error predicate so these two codes also trigger
Emergency_Stop(). Add test_gap3_overtemp_emergency_stop.c covering all
17 SystemError_t values (must-trigger and must-not-trigger), wired into
tests/Makefile alongside the existing gap-3 safety tests.
This commit is contained in:
@@ -885,8 +885,22 @@ void handleSystemError(SystemError_t error) {
|
|||||||
HAL_Delay(200);
|
HAL_Delay(200);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Critical errors trigger emergency shutdown
|
// Critical errors trigger emergency shutdown.
|
||||||
if (error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) {
|
//
|
||||||
|
// Safety-critical range: any fault that can damage the PAs or leave the
|
||||||
|
// system in an undefined state must cut the RF rails via Emergency_Stop().
|
||||||
|
// This covers:
|
||||||
|
// ERROR_RF_PA_OVERCURRENT .. ERROR_POWER_SUPPLY (9..13) -- PA/supply faults
|
||||||
|
// ERROR_TEMPERATURE_HIGH (14) -- >75 C on the PA thermal sensors;
|
||||||
|
// without cutting bias + 5V/5V5/RFPA rails
|
||||||
|
// the GaN QPA2962 stage can thermal-runaway.
|
||||||
|
// ERROR_WATCHDOG_TIMEOUT (16) -- health-check loop has stalled (>60 s);
|
||||||
|
// transmitter state is unknown, safest to
|
||||||
|
// latch Emergency_Stop rather than rely on
|
||||||
|
// IWDG reset (which re-energises the rails).
|
||||||
|
if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) ||
|
||||||
|
error == ERROR_TEMPERATURE_HIGH ||
|
||||||
|
error == ERROR_WATCHDOG_TIMEOUT) {
|
||||||
DIAG_ERR("SYS", "CRITICAL ERROR (code %d: %s) -- initiating Emergency_Stop()", error, error_strings[error]);
|
DIAG_ERR("SYS", "CRITICAL ERROR (code %d: %s) -- initiating Emergency_Stop()", error, error_strings[error]);
|
||||||
snprintf(error_msg, sizeof(error_msg),
|
snprintf(error_msg, sizeof(error_msg),
|
||||||
"CRITICAL ERROR! Initiating emergency shutdown.\r\n");
|
"CRITICAL ERROR! Initiating emergency shutdown.\r\n");
|
||||||
|
|||||||
@@ -64,7 +64,8 @@ TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \
|
|||||||
test_gap3_iwdg_config \
|
test_gap3_iwdg_config \
|
||||||
test_gap3_temperature_max \
|
test_gap3_temperature_max \
|
||||||
test_gap3_idq_periodic_reread \
|
test_gap3_idq_periodic_reread \
|
||||||
test_gap3_emergency_state_ordering
|
test_gap3_emergency_state_ordering \r
|
||||||
|
test_gap3_overtemp_emergency_stop
|
||||||
|
|
||||||
# Tests that need platform_noos_stm32.o + mocks
|
# Tests that need platform_noos_stm32.o + mocks
|
||||||
TESTS_WITH_PLATFORM := test_bug11_platform_spi_transmit_only
|
TESTS_WITH_PLATFORM := test_bug11_platform_spi_transmit_only
|
||||||
@@ -76,7 +77,8 @@ ALL_TESTS := $(TESTS_WITH_REAL) $(TESTS_MOCK_ONLY) $(TESTS_STANDALONE) $(TESTS_W
|
|||||||
|
|
||||||
.PHONY: all build test clean \
|
.PHONY: all build test clean \
|
||||||
$(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \
|
$(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \
|
||||||
test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order
|
test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \r
|
||||||
|
test_gap3_overtemp
|
||||||
|
|
||||||
all: build test
|
all: build test
|
||||||
|
|
||||||
@@ -162,6 +164,9 @@ test_gap3_idq_periodic_reread: test_gap3_idq_periodic_reread.c
|
|||||||
test_gap3_emergency_state_ordering: test_gap3_emergency_state_ordering.c
|
test_gap3_emergency_state_ordering: test_gap3_emergency_state_ordering.c
|
||||||
$(CC) $(CFLAGS) $< -o $@
|
$(CC) $(CFLAGS) $< -o $@
|
||||||
|
|
||||||
|
test_gap3_overtemp_emergency_stop: test_gap3_overtemp_emergency_stop.c
|
||||||
|
$(CC) $(CFLAGS) $< -o $@
|
||||||
|
|
||||||
# Tests that need platform_noos_stm32.o + mocks
|
# Tests that need platform_noos_stm32.o + mocks
|
||||||
$(TESTS_WITH_PLATFORM): %: %.c $(MOCK_OBJS) $(PLATFORM_OBJ)
|
$(TESTS_WITH_PLATFORM): %: %.c $(MOCK_OBJS) $(PLATFORM_OBJ)
|
||||||
$(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) $(PLATFORM_OBJ) -o $@
|
$(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) $(PLATFORM_OBJ) -o $@
|
||||||
@@ -246,6 +251,9 @@ test_gap3_idq: test_gap3_idq_periodic_reread
|
|||||||
test_gap3_order: test_gap3_emergency_state_ordering
|
test_gap3_order: test_gap3_emergency_state_ordering
|
||||||
./test_gap3_emergency_state_ordering
|
./test_gap3_emergency_state_ordering
|
||||||
|
|
||||||
|
test_gap3_overtemp: test_gap3_overtemp_emergency_stop
|
||||||
|
./test_gap3_overtemp_emergency_stop
|
||||||
|
|
||||||
# --- Clean ---
|
# --- Clean ---
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|||||||
@@ -0,0 +1,119 @@
|
|||||||
|
/*******************************************************************************
|
||||||
|
* test_gap3_overtemp_emergency_stop.c
|
||||||
|
*
|
||||||
|
* Safety bug: handleSystemError() did not escalate ERROR_TEMPERATURE_HIGH
|
||||||
|
* (or ERROR_WATCHDOG_TIMEOUT) to Emergency_Stop().
|
||||||
|
*
|
||||||
|
* Before fix: The critical-error gate was
|
||||||
|
* if (error >= ERROR_RF_PA_OVERCURRENT &&
|
||||||
|
* error <= ERROR_POWER_SUPPLY) { Emergency_Stop(); }
|
||||||
|
* So overtemp (code 14) and watchdog timeout (code 16) fell
|
||||||
|
* through to attemptErrorRecovery()'s default branch (log and
|
||||||
|
* continue), leaving the 10 W GaN PAs biased at >75 °C.
|
||||||
|
*
|
||||||
|
* After fix: The gate also matches ERROR_TEMPERATURE_HIGH and
|
||||||
|
* ERROR_WATCHDOG_TIMEOUT, so thermal and watchdog faults
|
||||||
|
* latch Emergency_Stop() exactly like PA overcurrent.
|
||||||
|
*
|
||||||
|
* Test strategy:
|
||||||
|
* Replicate the critical-error predicate and assert that every error
|
||||||
|
* enum value which threatens RF/power safety is accepted, and that the
|
||||||
|
* non-critical ones (comm, sensor, memory) are not.
|
||||||
|
******************************************************************************/
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
/* Mirror of SystemError_t from main.cpp (keep in lockstep). */
|
||||||
|
typedef enum {
|
||||||
|
ERROR_NONE = 0,
|
||||||
|
ERROR_AD9523_CLOCK,
|
||||||
|
ERROR_ADF4382_TX_UNLOCK,
|
||||||
|
ERROR_ADF4382_RX_UNLOCK,
|
||||||
|
ERROR_ADAR1000_COMM,
|
||||||
|
ERROR_ADAR1000_TEMP,
|
||||||
|
ERROR_IMU_COMM,
|
||||||
|
ERROR_BMP180_COMM,
|
||||||
|
ERROR_GPS_COMM,
|
||||||
|
ERROR_RF_PA_OVERCURRENT,
|
||||||
|
ERROR_RF_PA_BIAS,
|
||||||
|
ERROR_STEPPER_MOTOR,
|
||||||
|
ERROR_FPGA_COMM,
|
||||||
|
ERROR_POWER_SUPPLY,
|
||||||
|
ERROR_TEMPERATURE_HIGH,
|
||||||
|
ERROR_MEMORY_ALLOC,
|
||||||
|
ERROR_WATCHDOG_TIMEOUT
|
||||||
|
} SystemError_t;
|
||||||
|
|
||||||
|
/* Extracted post-fix predicate: returns 1 when Emergency_Stop() must fire. */
|
||||||
|
static int triggers_emergency_stop(SystemError_t e)
|
||||||
|
{
|
||||||
|
return ((e >= ERROR_RF_PA_OVERCURRENT && e <= ERROR_POWER_SUPPLY) ||
|
||||||
|
e == ERROR_TEMPERATURE_HIGH ||
|
||||||
|
e == ERROR_WATCHDOG_TIMEOUT);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
printf("=== Safety fix: overtemp / watchdog -> Emergency_Stop() ===\n");
|
||||||
|
|
||||||
|
/* --- Errors that MUST latch Emergency_Stop --- */
|
||||||
|
printf(" Test 1: ERROR_RF_PA_OVERCURRENT triggers... ");
|
||||||
|
assert(triggers_emergency_stop(ERROR_RF_PA_OVERCURRENT));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 2: ERROR_RF_PA_BIAS triggers... ");
|
||||||
|
assert(triggers_emergency_stop(ERROR_RF_PA_BIAS));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 3: ERROR_STEPPER_MOTOR triggers... ");
|
||||||
|
assert(triggers_emergency_stop(ERROR_STEPPER_MOTOR));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 4: ERROR_FPGA_COMM triggers... ");
|
||||||
|
assert(triggers_emergency_stop(ERROR_FPGA_COMM));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 5: ERROR_POWER_SUPPLY triggers... ");
|
||||||
|
assert(triggers_emergency_stop(ERROR_POWER_SUPPLY));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 6: ERROR_TEMPERATURE_HIGH triggers (regression)... ");
|
||||||
|
assert(triggers_emergency_stop(ERROR_TEMPERATURE_HIGH));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 7: ERROR_WATCHDOG_TIMEOUT triggers (regression)... ");
|
||||||
|
assert(triggers_emergency_stop(ERROR_WATCHDOG_TIMEOUT));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
/* --- Errors that MUST NOT escalate (recoverable / informational) --- */
|
||||||
|
printf(" Test 8: ERROR_NONE does not trigger... ");
|
||||||
|
assert(!triggers_emergency_stop(ERROR_NONE));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 9: ERROR_AD9523_CLOCK does not trigger... ");
|
||||||
|
assert(!triggers_emergency_stop(ERROR_AD9523_CLOCK));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 10: ERROR_ADF4382_TX_UNLOCK does not trigger (recoverable)... ");
|
||||||
|
assert(!triggers_emergency_stop(ERROR_ADF4382_TX_UNLOCK));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 11: ERROR_ADAR1000_COMM does not trigger... ");
|
||||||
|
assert(!triggers_emergency_stop(ERROR_ADAR1000_COMM));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 12: ERROR_IMU_COMM does not trigger... ");
|
||||||
|
assert(!triggers_emergency_stop(ERROR_IMU_COMM));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 13: ERROR_GPS_COMM does not trigger... ");
|
||||||
|
assert(!triggers_emergency_stop(ERROR_GPS_COMM));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf(" Test 14: ERROR_MEMORY_ALLOC does not trigger... ");
|
||||||
|
assert(!triggers_emergency_stop(ERROR_MEMORY_ALLOC));
|
||||||
|
printf("PASS\n");
|
||||||
|
|
||||||
|
printf("\n=== Safety fix: ALL TESTS PASSED ===\n\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user