diff --git a/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp b/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp index 09468d0..b8c9392 100644 --- a/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp +++ b/9_Firmware/9_1_Microcontroller/9_1_3_C_Cpp_Code/main.cpp @@ -885,8 +885,22 @@ void handleSystemError(SystemError_t error) { HAL_Delay(200); } - // Critical errors trigger emergency shutdown - if (error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) { + // Critical errors trigger emergency shutdown. + // + // Safety-critical range: any fault that can damage the PAs or leave the + // system in an undefined state must cut the RF rails via Emergency_Stop(). + // This covers: + // ERROR_RF_PA_OVERCURRENT .. ERROR_POWER_SUPPLY (9..13) -- PA/supply faults + // ERROR_TEMPERATURE_HIGH (14) -- >75 C on the PA thermal sensors; + // without cutting bias + 5V/5V5/RFPA rails + // the GaN QPA2962 stage can thermal-runaway. + // ERROR_WATCHDOG_TIMEOUT (16) -- health-check loop has stalled (>60 s); + // transmitter state is unknown, safest to + // latch Emergency_Stop rather than rely on + // IWDG reset (which re-energises the rails). + if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) || + error == ERROR_TEMPERATURE_HIGH || + error == ERROR_WATCHDOG_TIMEOUT) { DIAG_ERR("SYS", "CRITICAL ERROR (code %d: %s) -- initiating Emergency_Stop()", error, error_strings[error]); snprintf(error_msg, sizeof(error_msg), "CRITICAL ERROR! Initiating emergency shutdown.\r\n"); diff --git a/9_Firmware/9_1_Microcontroller/tests/Makefile b/9_Firmware/9_1_Microcontroller/tests/Makefile index 73e7857..9cff000 100644 --- a/9_Firmware/9_1_Microcontroller/tests/Makefile +++ b/9_Firmware/9_1_Microcontroller/tests/Makefile @@ -64,7 +64,8 @@ TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \ test_gap3_iwdg_config \ test_gap3_temperature_max \ test_gap3_idq_periodic_reread \ - test_gap3_emergency_state_ordering + test_gap3_emergency_state_ordering \r + test_gap3_overtemp_emergency_stop # Tests that need platform_noos_stm32.o + mocks TESTS_WITH_PLATFORM := test_bug11_platform_spi_transmit_only @@ -76,7 +77,8 @@ ALL_TESTS := $(TESTS_WITH_REAL) $(TESTS_MOCK_ONLY) $(TESTS_STANDALONE) $(TESTS_W .PHONY: all build test clean \ $(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \ - test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order + test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \r + test_gap3_overtemp all: build test @@ -162,6 +164,9 @@ test_gap3_idq_periodic_reread: test_gap3_idq_periodic_reread.c test_gap3_emergency_state_ordering: test_gap3_emergency_state_ordering.c $(CC) $(CFLAGS) $< -o $@ +test_gap3_overtemp_emergency_stop: test_gap3_overtemp_emergency_stop.c + $(CC) $(CFLAGS) $< -o $@ + # Tests that need platform_noos_stm32.o + mocks $(TESTS_WITH_PLATFORM): %: %.c $(MOCK_OBJS) $(PLATFORM_OBJ) $(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) $(PLATFORM_OBJ) -o $@ @@ -246,6 +251,9 @@ test_gap3_idq: test_gap3_idq_periodic_reread test_gap3_order: test_gap3_emergency_state_ordering ./test_gap3_emergency_state_ordering +test_gap3_overtemp: test_gap3_overtemp_emergency_stop + ./test_gap3_overtemp_emergency_stop + # --- Clean --- clean: diff --git a/9_Firmware/9_1_Microcontroller/tests/test_gap3_overtemp_emergency_stop.c b/9_Firmware/9_1_Microcontroller/tests/test_gap3_overtemp_emergency_stop.c new file mode 100644 index 0000000..82b0df3 --- /dev/null +++ b/9_Firmware/9_1_Microcontroller/tests/test_gap3_overtemp_emergency_stop.c @@ -0,0 +1,119 @@ +/******************************************************************************* + * test_gap3_overtemp_emergency_stop.c + * + * Safety bug: handleSystemError() did not escalate ERROR_TEMPERATURE_HIGH + * (or ERROR_WATCHDOG_TIMEOUT) to Emergency_Stop(). + * + * Before fix: The critical-error gate was + * if (error >= ERROR_RF_PA_OVERCURRENT && + * error <= ERROR_POWER_SUPPLY) { Emergency_Stop(); } + * So overtemp (code 14) and watchdog timeout (code 16) fell + * through to attemptErrorRecovery()'s default branch (log and + * continue), leaving the 10 W GaN PAs biased at >75 °C. + * + * After fix: The gate also matches ERROR_TEMPERATURE_HIGH and + * ERROR_WATCHDOG_TIMEOUT, so thermal and watchdog faults + * latch Emergency_Stop() exactly like PA overcurrent. + * + * Test strategy: + * Replicate the critical-error predicate and assert that every error + * enum value which threatens RF/power safety is accepted, and that the + * non-critical ones (comm, sensor, memory) are not. + ******************************************************************************/ +#include +#include + +/* Mirror of SystemError_t from main.cpp (keep in lockstep). */ +typedef enum { + ERROR_NONE = 0, + ERROR_AD9523_CLOCK, + ERROR_ADF4382_TX_UNLOCK, + ERROR_ADF4382_RX_UNLOCK, + ERROR_ADAR1000_COMM, + ERROR_ADAR1000_TEMP, + ERROR_IMU_COMM, + ERROR_BMP180_COMM, + ERROR_GPS_COMM, + ERROR_RF_PA_OVERCURRENT, + ERROR_RF_PA_BIAS, + ERROR_STEPPER_MOTOR, + ERROR_FPGA_COMM, + ERROR_POWER_SUPPLY, + ERROR_TEMPERATURE_HIGH, + ERROR_MEMORY_ALLOC, + ERROR_WATCHDOG_TIMEOUT +} SystemError_t; + +/* Extracted post-fix predicate: returns 1 when Emergency_Stop() must fire. */ +static int triggers_emergency_stop(SystemError_t e) +{ + return ((e >= ERROR_RF_PA_OVERCURRENT && e <= ERROR_POWER_SUPPLY) || + e == ERROR_TEMPERATURE_HIGH || + e == ERROR_WATCHDOG_TIMEOUT); +} + +int main(void) +{ + printf("=== Safety fix: overtemp / watchdog -> Emergency_Stop() ===\n"); + + /* --- Errors that MUST latch Emergency_Stop --- */ + printf(" Test 1: ERROR_RF_PA_OVERCURRENT triggers... "); + assert(triggers_emergency_stop(ERROR_RF_PA_OVERCURRENT)); + printf("PASS\n"); + + printf(" Test 2: ERROR_RF_PA_BIAS triggers... "); + assert(triggers_emergency_stop(ERROR_RF_PA_BIAS)); + printf("PASS\n"); + + printf(" Test 3: ERROR_STEPPER_MOTOR triggers... "); + assert(triggers_emergency_stop(ERROR_STEPPER_MOTOR)); + printf("PASS\n"); + + printf(" Test 4: ERROR_FPGA_COMM triggers... "); + assert(triggers_emergency_stop(ERROR_FPGA_COMM)); + printf("PASS\n"); + + printf(" Test 5: ERROR_POWER_SUPPLY triggers... "); + assert(triggers_emergency_stop(ERROR_POWER_SUPPLY)); + printf("PASS\n"); + + printf(" Test 6: ERROR_TEMPERATURE_HIGH triggers (regression)... "); + assert(triggers_emergency_stop(ERROR_TEMPERATURE_HIGH)); + printf("PASS\n"); + + printf(" Test 7: ERROR_WATCHDOG_TIMEOUT triggers (regression)... "); + assert(triggers_emergency_stop(ERROR_WATCHDOG_TIMEOUT)); + printf("PASS\n"); + + /* --- Errors that MUST NOT escalate (recoverable / informational) --- */ + printf(" Test 8: ERROR_NONE does not trigger... "); + assert(!triggers_emergency_stop(ERROR_NONE)); + printf("PASS\n"); + + printf(" Test 9: ERROR_AD9523_CLOCK does not trigger... "); + assert(!triggers_emergency_stop(ERROR_AD9523_CLOCK)); + printf("PASS\n"); + + printf(" Test 10: ERROR_ADF4382_TX_UNLOCK does not trigger (recoverable)... "); + assert(!triggers_emergency_stop(ERROR_ADF4382_TX_UNLOCK)); + printf("PASS\n"); + + printf(" Test 11: ERROR_ADAR1000_COMM does not trigger... "); + assert(!triggers_emergency_stop(ERROR_ADAR1000_COMM)); + printf("PASS\n"); + + printf(" Test 12: ERROR_IMU_COMM does not trigger... "); + assert(!triggers_emergency_stop(ERROR_IMU_COMM)); + printf("PASS\n"); + + printf(" Test 13: ERROR_GPS_COMM does not trigger... "); + assert(!triggers_emergency_stop(ERROR_GPS_COMM)); + printf("PASS\n"); + + printf(" Test 14: ERROR_MEMORY_ALLOC does not trigger... "); + assert(!triggers_emergency_stop(ERROR_MEMORY_ALLOC)); + printf("PASS\n"); + + printf("\n=== Safety fix: ALL TESTS PASSED ===\n\n"); + return 0; +}