From a2686b742402aa9faf07499ad5b88830a6c50910 Mon Sep 17 00:00:00 2001 From: 3aLaee Date: Tue, 14 Apr 2026 21:53:39 +0200 Subject: [PATCH 1/3] fix(mcu): escalate overtemp and watchdog-timeout faults to Emergency_Stop() handleSystemError() only called Emergency_Stop() for error codes in [ERROR_RF_PA_OVERCURRENT .. ERROR_POWER_SUPPLY] (9..13). Two critical faults were left out of the gate and fell through to attemptErrorRecovery()'s default log-and-continue branch: - ERROR_TEMPERATURE_HIGH (14): raised by checkSystemHealth() when the hottest of 8 PA thermal sensors exceeds 75 C. Without cutting bias (DAC CLR) and the PA 5V0/5V5/RFPA_VDD rails, the 10 W GaN QPA2962 stages remain biased in an overtemperature state -- a thermal-runaway path in AERIS-10E. - ERROR_WATCHDOG_TIMEOUT (16): indicates the health-check loop has stalled (>60 s since last pass). Transmitter state is unknown; relying on IWDG to reset the MCU re-runs startup and re-energises the PA rails rather than latching the safe state. Fix: extend the critical-error predicate so these two codes also trigger Emergency_Stop(). Add test_gap3_overtemp_emergency_stop.c covering all 17 SystemError_t values (must-trigger and must-not-trigger), wired into tests/Makefile alongside the existing gap-3 safety tests. --- .../9_1_3_C_Cpp_Code/main.cpp | 18 ++- 9_Firmware/9_1_Microcontroller/tests/Makefile | 12 +- .../tests/test_gap3_overtemp_emergency_stop.c | 119 ++++++++++++++++++ 3 files changed, 145 insertions(+), 4 deletions(-) create mode 100644 9_Firmware/9_1_Microcontroller/tests/test_gap3_overtemp_emergency_stop.c diff --git a/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp b/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp index 09468d0..b8c9392 100644 --- a/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp +++ b/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp @@ -885,8 +885,22 @@ void handleSystemError(SystemError_t error) { HAL_Delay(200); } - // Critical errors trigger emergency shutdown - if (error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) { + // Critical errors trigger emergency shutdown. + // + // Safety-critical range: any fault that can damage the PAs or leave the + // system in an undefined state must cut the RF rails via Emergency_Stop(). + // This covers: + // ERROR_RF_PA_OVERCURRENT .. ERROR_POWER_SUPPLY (9..13) -- PA/supply faults + // ERROR_TEMPERATURE_HIGH (14) -- >75 C on the PA thermal sensors; + // without cutting bias + 5V/5V5/RFPA rails + // the GaN QPA2962 stage can thermal-runaway. + // ERROR_WATCHDOG_TIMEOUT (16) -- health-check loop has stalled (>60 s); + // transmitter state is unknown, safest to + // latch Emergency_Stop rather than rely on + // IWDG reset (which re-energises the rails). + if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) || + error == ERROR_TEMPERATURE_HIGH || + error == ERROR_WATCHDOG_TIMEOUT) { DIAG_ERR("SYS", "CRITICAL ERROR (code %d: %s) -- initiating Emergency_Stop()", error, error_strings[error]); snprintf(error_msg, sizeof(error_msg), "CRITICAL ERROR! Initiating emergency shutdown.\r\n"); diff --git a/9_Firmware/9_1_Microcontroller/tests/Makefile b/9_Firmware/9_1_Microcontroller/tests/Makefile index 73e7857..9cff000 100644 --- a/9_Firmware/9_1_Microcontroller/tests/Makefile +++ b/9_Firmware/9_1_Microcontroller/tests/Makefile @@ -64,7 +64,8 @@ TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \ test_gap3_iwdg_config \ test_gap3_temperature_max \ test_gap3_idq_periodic_reread \ - test_gap3_emergency_state_ordering + test_gap3_emergency_state_ordering \r + test_gap3_overtemp_emergency_stop # Tests that need platform_noos_stm32.o + mocks TESTS_WITH_PLATFORM := test_bug11_platform_spi_transmit_only @@ -76,7 +77,8 @@ ALL_TESTS := $(TESTS_WITH_REAL) $(TESTS_MOCK_ONLY) $(TESTS_STANDALONE) $(TESTS_W .PHONY: all build test clean \ $(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \ - test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order + test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \r + test_gap3_overtemp all: build test @@ -162,6 +164,9 @@ test_gap3_idq_periodic_reread: test_gap3_idq_periodic_reread.c test_gap3_emergency_state_ordering: test_gap3_emergency_state_ordering.c $(CC) $(CFLAGS) $< -o $@ +test_gap3_overtemp_emergency_stop: test_gap3_overtemp_emergency_stop.c + $(CC) $(CFLAGS) $< -o $@ + # Tests that need platform_noos_stm32.o + mocks $(TESTS_WITH_PLATFORM): %: %.c $(MOCK_OBJS) $(PLATFORM_OBJ) $(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) $(PLATFORM_OBJ) -o $@ @@ -246,6 +251,9 @@ test_gap3_idq: test_gap3_idq_periodic_reread test_gap3_order: test_gap3_emergency_state_ordering ./test_gap3_emergency_state_ordering +test_gap3_overtemp: test_gap3_overtemp_emergency_stop + ./test_gap3_overtemp_emergency_stop + # --- Clean --- clean: diff --git a/9_Firmware/9_1_Microcontroller/tests/test_gap3_overtemp_emergency_stop.c b/9_Firmware/9_1_Microcontroller/tests/test_gap3_overtemp_emergency_stop.c new file mode 100644 index 0000000..82b0df3 --- /dev/null +++ b/9_Firmware/9_1_Microcontroller/tests/test_gap3_overtemp_emergency_stop.c @@ -0,0 +1,119 @@ +/******************************************************************************* + * test_gap3_overtemp_emergency_stop.c + * + * Safety bug: handleSystemError() did not escalate ERROR_TEMPERATURE_HIGH + * (or ERROR_WATCHDOG_TIMEOUT) to Emergency_Stop(). + * + * Before fix: The critical-error gate was + * if (error >= ERROR_RF_PA_OVERCURRENT && + * error <= ERROR_POWER_SUPPLY) { Emergency_Stop(); } + * So overtemp (code 14) and watchdog timeout (code 16) fell + * through to attemptErrorRecovery()'s default branch (log and + * continue), leaving the 10 W GaN PAs biased at >75 °C. + * + * After fix: The gate also matches ERROR_TEMPERATURE_HIGH and + * ERROR_WATCHDOG_TIMEOUT, so thermal and watchdog faults + * latch Emergency_Stop() exactly like PA overcurrent. + * + * Test strategy: + * Replicate the critical-error predicate and assert that every error + * enum value which threatens RF/power safety is accepted, and that the + * non-critical ones (comm, sensor, memory) are not. + ******************************************************************************/ +#include +#include + +/* Mirror of SystemError_t from main.cpp (keep in lockstep). */ +typedef enum { + ERROR_NONE = 0, + ERROR_AD9523_CLOCK, + ERROR_ADF4382_TX_UNLOCK, + ERROR_ADF4382_RX_UNLOCK, + ERROR_ADAR1000_COMM, + ERROR_ADAR1000_TEMP, + ERROR_IMU_COMM, + ERROR_BMP180_COMM, + ERROR_GPS_COMM, + ERROR_RF_PA_OVERCURRENT, + ERROR_RF_PA_BIAS, + ERROR_STEPPER_MOTOR, + ERROR_FPGA_COMM, + ERROR_POWER_SUPPLY, + ERROR_TEMPERATURE_HIGH, + ERROR_MEMORY_ALLOC, + ERROR_WATCHDOG_TIMEOUT +} SystemError_t; + +/* Extracted post-fix predicate: returns 1 when Emergency_Stop() must fire. */ +static int triggers_emergency_stop(SystemError_t e) +{ + return ((e >= ERROR_RF_PA_OVERCURRENT && e <= ERROR_POWER_SUPPLY) || + e == ERROR_TEMPERATURE_HIGH || + e == ERROR_WATCHDOG_TIMEOUT); +} + +int main(void) +{ + printf("=== Safety fix: overtemp / watchdog -> Emergency_Stop() ===\n"); + + /* --- Errors that MUST latch Emergency_Stop --- */ + printf(" Test 1: ERROR_RF_PA_OVERCURRENT triggers... "); + assert(triggers_emergency_stop(ERROR_RF_PA_OVERCURRENT)); + printf("PASS\n"); + + printf(" Test 2: ERROR_RF_PA_BIAS triggers... "); + assert(triggers_emergency_stop(ERROR_RF_PA_BIAS)); + printf("PASS\n"); + + printf(" Test 3: ERROR_STEPPER_MOTOR triggers... "); + assert(triggers_emergency_stop(ERROR_STEPPER_MOTOR)); + printf("PASS\n"); + + printf(" Test 4: ERROR_FPGA_COMM triggers... "); + assert(triggers_emergency_stop(ERROR_FPGA_COMM)); + printf("PASS\n"); + + printf(" Test 5: ERROR_POWER_SUPPLY triggers... "); + assert(triggers_emergency_stop(ERROR_POWER_SUPPLY)); + printf("PASS\n"); + + printf(" Test 6: ERROR_TEMPERATURE_HIGH triggers (regression)... "); + assert(triggers_emergency_stop(ERROR_TEMPERATURE_HIGH)); + printf("PASS\n"); + + printf(" Test 7: ERROR_WATCHDOG_TIMEOUT triggers (regression)... "); + assert(triggers_emergency_stop(ERROR_WATCHDOG_TIMEOUT)); + printf("PASS\n"); + + /* --- Errors that MUST NOT escalate (recoverable / informational) --- */ + printf(" Test 8: ERROR_NONE does not trigger... "); + assert(!triggers_emergency_stop(ERROR_NONE)); + printf("PASS\n"); + + printf(" Test 9: ERROR_AD9523_CLOCK does not trigger... "); + assert(!triggers_emergency_stop(ERROR_AD9523_CLOCK)); + printf("PASS\n"); + + printf(" Test 10: ERROR_ADF4382_TX_UNLOCK does not trigger (recoverable)... "); + assert(!triggers_emergency_stop(ERROR_ADF4382_TX_UNLOCK)); + printf("PASS\n"); + + printf(" Test 11: ERROR_ADAR1000_COMM does not trigger... "); + assert(!triggers_emergency_stop(ERROR_ADAR1000_COMM)); + printf("PASS\n"); + + printf(" Test 12: ERROR_IMU_COMM does not trigger... "); + assert(!triggers_emergency_stop(ERROR_IMU_COMM)); + printf("PASS\n"); + + printf(" Test 13: ERROR_GPS_COMM does not trigger... "); + assert(!triggers_emergency_stop(ERROR_GPS_COMM)); + printf("PASS\n"); + + printf(" Test 14: ERROR_MEMORY_ALLOC does not trigger... "); + assert(!triggers_emergency_stop(ERROR_MEMORY_ALLOC)); + printf("PASS\n"); + + printf("\n=== Safety fix: ALL TESTS PASSED ===\n\n"); + return 0; +} From 49002820425066bfbee6a22e799768cfdbc3a39e Mon Sep 17 00:00:00 2001 From: 3aLaee Date: Wed, 15 Apr 2026 09:16:03 +0200 Subject: [PATCH 2/3] fix(mcu-tests): strip stray literal backslash-r in Makefile continuations The previous commit accidentally introduced the literal 2-byte sequence '\r' at the end of two backslash-continuation lines (TESTS_STANDALONE and the .PHONY list). GNU make on Linux treats that as text rather than a line continuation, which orphans the following line with leading spaces and aborts CI with: Makefile:68: *** missing separator (did you mean TAB instead of 8 spaces?) Strip the extraneous 'r' so each continuation ends with a real backslash + LF. --- 9_Firmware/9_1_Microcontroller/tests/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/9_Firmware/9_1_Microcontroller/tests/Makefile b/9_Firmware/9_1_Microcontroller/tests/Makefile index 9cff000..75b7548 100644 --- a/9_Firmware/9_1_Microcontroller/tests/Makefile +++ b/9_Firmware/9_1_Microcontroller/tests/Makefile @@ -64,7 +64,7 @@ TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \ test_gap3_iwdg_config \ test_gap3_temperature_max \ test_gap3_idq_periodic_reread \ - test_gap3_emergency_state_ordering \r + test_gap3_emergency_state_ordering \ test_gap3_overtemp_emergency_stop # Tests that need platform_noos_stm32.o + mocks @@ -77,7 +77,7 @@ ALL_TESTS := $(TESTS_WITH_REAL) $(TESTS_MOCK_ONLY) $(TESTS_STANDALONE) $(TESTS_W .PHONY: all build test clean \ $(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \ - test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \r + test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \ test_gap3_overtemp all: build test From 0b25db08b5fb236bab8a570b6be41275ae9ed277 Mon Sep 17 00:00:00 2001 From: Jason <83615043+JJassonn69@users.noreply.github.com> Date: Wed, 15 Apr 2026 13:18:07 +0545 Subject: [PATCH 3/3] fix(test): align emergency_state_ordering test with overtemp/watchdog fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename ERROR_STEPPER_FAULT → ERROR_STEPPER_MOTOR to match main.cpp enum - Update critical-error predicate to include ERROR_TEMPERATURE_HIGH and ERROR_WATCHDOG_TIMEOUT (was testing stale pre-fix logic) - Test 4 now asserts overtemp DOES trigger e-stop (previously asserted opposite) - Add Test 5 (watchdog triggers e-stop) and Test 6 (memory alloc does not) - Add ERROR_MEMORY_ALLOC and ERROR_WATCHDOG_TIMEOUT to local enum - 7 tests, all pass --- .../test_gap3_emergency_state_ordering.c | 45 ++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/9_Firmware/9_1_Microcontroller/tests/test_gap3_emergency_state_ordering.c b/9_Firmware/9_1_Microcontroller/tests/test_gap3_emergency_state_ordering.c index 6ebaa5a..28db700 100644 --- a/9_Firmware/9_1_Microcontroller/tests/test_gap3_emergency_state_ordering.c +++ b/9_Firmware/9_1_Microcontroller/tests/test_gap3_emergency_state_ordering.c @@ -34,22 +34,25 @@ static void Mock_Emergency_Stop(void) state_was_true_when_estop_called = system_emergency_state; } -/* Error codes (subset matching main.cpp) */ +/* Error codes (subset matching main.cpp SystemError_t) */ typedef enum { ERROR_NONE = 0, ERROR_RF_PA_OVERCURRENT = 9, ERROR_RF_PA_BIAS = 10, - ERROR_STEPPER_FAULT = 11, + ERROR_STEPPER_MOTOR = 11, ERROR_FPGA_COMM = 12, ERROR_POWER_SUPPLY = 13, ERROR_TEMPERATURE_HIGH = 14, + ERROR_MEMORY_ALLOC = 15, + ERROR_WATCHDOG_TIMEOUT = 16, } SystemError_t; -/* Extracted critical-error handling logic (post-fix ordering) */ +/* Extracted critical-error handling logic (matches post-fix main.cpp predicate) */ static void simulate_handleSystemError_critical(SystemError_t error) { - /* Only critical errors (PA overcurrent through power supply) trigger e-stop */ - if (error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) { + if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) || + error == ERROR_TEMPERATURE_HIGH || + error == ERROR_WATCHDOG_TIMEOUT) { /* FIX 5: set flag BEFORE calling Emergency_Stop */ system_emergency_state = true; Mock_Emergency_Stop(); @@ -93,17 +96,39 @@ int main(void) assert(state_was_true_when_estop_called == true); printf("PASS\n"); - /* Test 4: Non-critical error → no e-stop, flag stays false */ - printf(" Test 4: Non-critical error (no e-stop)... "); + /* Test 4: Overtemp → MUST trigger e-stop (was incorrectly non-critical before fix) */ + printf(" Test 4: Overtemp triggers e-stop... "); system_emergency_state = false; emergency_stop_called = false; + state_was_true_when_estop_called = false; simulate_handleSystemError_critical(ERROR_TEMPERATURE_HIGH); + assert(emergency_stop_called == true); + assert(system_emergency_state == true); + assert(state_was_true_when_estop_called == true); + printf("PASS\n"); + + /* Test 5: Watchdog timeout → MUST trigger e-stop */ + printf(" Test 5: Watchdog timeout triggers e-stop... "); + system_emergency_state = false; + emergency_stop_called = false; + state_was_true_when_estop_called = false; + simulate_handleSystemError_critical(ERROR_WATCHDOG_TIMEOUT); + assert(emergency_stop_called == true); + assert(system_emergency_state == true); + assert(state_was_true_when_estop_called == true); + printf("PASS\n"); + + /* Test 6: Non-critical error (memory alloc) → no e-stop */ + printf(" Test 6: Non-critical error (no e-stop)... "); + system_emergency_state = false; + emergency_stop_called = false; + simulate_handleSystemError_critical(ERROR_MEMORY_ALLOC); assert(emergency_stop_called == false); assert(system_emergency_state == false); printf("PASS\n"); - /* Test 5: ERROR_NONE → no e-stop */ - printf(" Test 5: ERROR_NONE (no action)... "); + /* Test 7: ERROR_NONE → no e-stop */ + printf(" Test 7: ERROR_NONE (no action)... "); system_emergency_state = false; emergency_stop_called = false; simulate_handleSystemError_critical(ERROR_NONE); @@ -111,6 +136,6 @@ int main(void) assert(system_emergency_state == false); printf("PASS\n"); - printf("\n=== Gap-3 Fix 5: ALL TESTS PASSED ===\n\n"); + printf("\n=== Gap-3 Fix 5: ALL 7 TESTS PASSED ===\n\n"); return 0; }