Merge pull request #69 from 3aLaee/fix/overtemp-emergency-stop

Escalate overtemp and watchdog-timeout faults to Emergency_Stop()
This commit is contained in:
Jason
2026-04-15 12:51:22 +03:00
committed by GitHub
4 changed files with 180 additions and 14 deletions
@@ -932,8 +932,22 @@ void handleSystemError(SystemError_t error) {
HAL_Delay(200);
}
// Critical errors trigger emergency shutdown
if (error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) {
// Critical errors trigger emergency shutdown.
//
// Safety-critical range: any fault that can damage the PAs or leave the
// system in an undefined state must cut the RF rails via Emergency_Stop().
// This covers:
// ERROR_RF_PA_OVERCURRENT .. ERROR_POWER_SUPPLY (9..13) -- PA/supply faults
// ERROR_TEMPERATURE_HIGH (14) -- >75 C on the PA thermal sensors;
// without cutting bias + 5V/5V5/RFPA rails
// the GaN QPA2962 stage can thermal-runaway.
// ERROR_WATCHDOG_TIMEOUT (16) -- health-check loop has stalled (>60 s);
// transmitter state is unknown, safest to
// latch Emergency_Stop rather than rely on
// IWDG reset (which re-energises the rails).
if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) ||
error == ERROR_TEMPERATURE_HIGH ||
error == ERROR_WATCHDOG_TIMEOUT) {
DIAG_ERR("SYS", "CRITICAL ERROR (code %d: %s) -- initiating Emergency_Stop()", error, error_strings[error]);
snprintf(error_msg, sizeof(error_msg),
"CRITICAL ERROR! Initiating emergency shutdown.\r\n");
+10 -2
View File
@@ -64,7 +64,8 @@ TESTS_STANDALONE := test_bug12_pa_cal_loop_inverted \
test_gap3_iwdg_config \
test_gap3_temperature_max \
test_gap3_idq_periodic_reread \
test_gap3_emergency_state_ordering
test_gap3_emergency_state_ordering \
test_gap3_overtemp_emergency_stop
# Tests that need platform_noos_stm32.o + mocks
TESTS_WITH_PLATFORM := test_bug11_platform_spi_transmit_only
@@ -76,7 +77,8 @@ ALL_TESTS := $(TESTS_WITH_REAL) $(TESTS_MOCK_ONLY) $(TESTS_STANDALONE) $(TESTS_W
.PHONY: all build test clean \
$(addprefix test_,bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 bug14 bug15) \
test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order
test_gap3_estop test_gap3_iwdg test_gap3_temp test_gap3_idq test_gap3_order \
test_gap3_overtemp
all: build test
@@ -162,6 +164,9 @@ test_gap3_idq_periodic_reread: test_gap3_idq_periodic_reread.c
test_gap3_emergency_state_ordering: test_gap3_emergency_state_ordering.c
$(CC) $(CFLAGS) $< -o $@
test_gap3_overtemp_emergency_stop: test_gap3_overtemp_emergency_stop.c
$(CC) $(CFLAGS) $< -o $@
# Tests that need platform_noos_stm32.o + mocks
$(TESTS_WITH_PLATFORM): %: %.c $(MOCK_OBJS) $(PLATFORM_OBJ)
$(CC) $(CFLAGS) $(INCLUDES) $< $(MOCK_OBJS) $(PLATFORM_OBJ) -o $@
@@ -246,6 +251,9 @@ test_gap3_idq: test_gap3_idq_periodic_reread
test_gap3_order: test_gap3_emergency_state_ordering
./test_gap3_emergency_state_ordering
test_gap3_overtemp: test_gap3_overtemp_emergency_stop
./test_gap3_overtemp_emergency_stop
# --- Clean ---
clean:
@@ -34,22 +34,25 @@ static void Mock_Emergency_Stop(void)
state_was_true_when_estop_called = system_emergency_state;
}
/* Error codes (subset matching main.cpp) */
/* Error codes (subset matching main.cpp SystemError_t) */
typedef enum {
ERROR_NONE = 0,
ERROR_RF_PA_OVERCURRENT = 9,
ERROR_RF_PA_BIAS = 10,
ERROR_STEPPER_FAULT = 11,
ERROR_STEPPER_MOTOR = 11,
ERROR_FPGA_COMM = 12,
ERROR_POWER_SUPPLY = 13,
ERROR_TEMPERATURE_HIGH = 14,
ERROR_MEMORY_ALLOC = 15,
ERROR_WATCHDOG_TIMEOUT = 16,
} SystemError_t;
/* Extracted critical-error handling logic (post-fix ordering) */
/* Extracted critical-error handling logic (matches post-fix main.cpp predicate) */
static void simulate_handleSystemError_critical(SystemError_t error)
{
/* Only critical errors (PA overcurrent through power supply) trigger e-stop */
if (error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) {
if ((error >= ERROR_RF_PA_OVERCURRENT && error <= ERROR_POWER_SUPPLY) ||
error == ERROR_TEMPERATURE_HIGH ||
error == ERROR_WATCHDOG_TIMEOUT) {
/* FIX 5: set flag BEFORE calling Emergency_Stop */
system_emergency_state = true;
Mock_Emergency_Stop();
@@ -93,17 +96,39 @@ int main(void)
assert(state_was_true_when_estop_called == true);
printf("PASS\n");
/* Test 4: Non-critical error → no e-stop, flag stays false */
printf(" Test 4: Non-critical error (no e-stop)... ");
/* Test 4: Overtemp → MUST trigger e-stop (was incorrectly non-critical before fix) */
printf(" Test 4: Overtemp triggers e-stop... ");
system_emergency_state = false;
emergency_stop_called = false;
state_was_true_when_estop_called = false;
simulate_handleSystemError_critical(ERROR_TEMPERATURE_HIGH);
assert(emergency_stop_called == true);
assert(system_emergency_state == true);
assert(state_was_true_when_estop_called == true);
printf("PASS\n");
/* Test 5: Watchdog timeout → MUST trigger e-stop */
printf(" Test 5: Watchdog timeout triggers e-stop... ");
system_emergency_state = false;
emergency_stop_called = false;
state_was_true_when_estop_called = false;
simulate_handleSystemError_critical(ERROR_WATCHDOG_TIMEOUT);
assert(emergency_stop_called == true);
assert(system_emergency_state == true);
assert(state_was_true_when_estop_called == true);
printf("PASS\n");
/* Test 6: Non-critical error (memory alloc) → no e-stop */
printf(" Test 6: Non-critical error (no e-stop)... ");
system_emergency_state = false;
emergency_stop_called = false;
simulate_handleSystemError_critical(ERROR_MEMORY_ALLOC);
assert(emergency_stop_called == false);
assert(system_emergency_state == false);
printf("PASS\n");
/* Test 5: ERROR_NONE → no e-stop */
printf(" Test 5: ERROR_NONE (no action)... ");
/* Test 7: ERROR_NONE → no e-stop */
printf(" Test 7: ERROR_NONE (no action)... ");
system_emergency_state = false;
emergency_stop_called = false;
simulate_handleSystemError_critical(ERROR_NONE);
@@ -111,6 +136,6 @@ int main(void)
assert(system_emergency_state == false);
printf("PASS\n");
printf("\n=== Gap-3 Fix 5: ALL TESTS PASSED ===\n\n");
printf("\n=== Gap-3 Fix 5: ALL 7 TESTS PASSED ===\n\n");
return 0;
}
@@ -0,0 +1,119 @@
/*******************************************************************************
* test_gap3_overtemp_emergency_stop.c
*
* Safety bug: handleSystemError() did not escalate ERROR_TEMPERATURE_HIGH
* (or ERROR_WATCHDOG_TIMEOUT) to Emergency_Stop().
*
* Before fix: The critical-error gate was
* if (error >= ERROR_RF_PA_OVERCURRENT &&
* error <= ERROR_POWER_SUPPLY) { Emergency_Stop(); }
* So overtemp (code 14) and watchdog timeout (code 16) fell
* through to attemptErrorRecovery()'s default branch (log and
* continue), leaving the 10 W GaN PAs biased at >75 °C.
*
* After fix: The gate also matches ERROR_TEMPERATURE_HIGH and
* ERROR_WATCHDOG_TIMEOUT, so thermal and watchdog faults
* latch Emergency_Stop() exactly like PA overcurrent.
*
* Test strategy:
* Replicate the critical-error predicate and assert that every error
* enum value which threatens RF/power safety is accepted, and that the
* non-critical ones (comm, sensor, memory) are not.
******************************************************************************/
#include <assert.h>
#include <stdio.h>
/* Mirror of SystemError_t from main.cpp (keep in lockstep). */
typedef enum {
ERROR_NONE = 0,
ERROR_AD9523_CLOCK,
ERROR_ADF4382_TX_UNLOCK,
ERROR_ADF4382_RX_UNLOCK,
ERROR_ADAR1000_COMM,
ERROR_ADAR1000_TEMP,
ERROR_IMU_COMM,
ERROR_BMP180_COMM,
ERROR_GPS_COMM,
ERROR_RF_PA_OVERCURRENT,
ERROR_RF_PA_BIAS,
ERROR_STEPPER_MOTOR,
ERROR_FPGA_COMM,
ERROR_POWER_SUPPLY,
ERROR_TEMPERATURE_HIGH,
ERROR_MEMORY_ALLOC,
ERROR_WATCHDOG_TIMEOUT
} SystemError_t;
/* Extracted post-fix predicate: returns 1 when Emergency_Stop() must fire. */
static int triggers_emergency_stop(SystemError_t e)
{
return ((e >= ERROR_RF_PA_OVERCURRENT && e <= ERROR_POWER_SUPPLY) ||
e == ERROR_TEMPERATURE_HIGH ||
e == ERROR_WATCHDOG_TIMEOUT);
}
int main(void)
{
printf("=== Safety fix: overtemp / watchdog -> Emergency_Stop() ===\n");
/* --- Errors that MUST latch Emergency_Stop --- */
printf(" Test 1: ERROR_RF_PA_OVERCURRENT triggers... ");
assert(triggers_emergency_stop(ERROR_RF_PA_OVERCURRENT));
printf("PASS\n");
printf(" Test 2: ERROR_RF_PA_BIAS triggers... ");
assert(triggers_emergency_stop(ERROR_RF_PA_BIAS));
printf("PASS\n");
printf(" Test 3: ERROR_STEPPER_MOTOR triggers... ");
assert(triggers_emergency_stop(ERROR_STEPPER_MOTOR));
printf("PASS\n");
printf(" Test 4: ERROR_FPGA_COMM triggers... ");
assert(triggers_emergency_stop(ERROR_FPGA_COMM));
printf("PASS\n");
printf(" Test 5: ERROR_POWER_SUPPLY triggers... ");
assert(triggers_emergency_stop(ERROR_POWER_SUPPLY));
printf("PASS\n");
printf(" Test 6: ERROR_TEMPERATURE_HIGH triggers (regression)... ");
assert(triggers_emergency_stop(ERROR_TEMPERATURE_HIGH));
printf("PASS\n");
printf(" Test 7: ERROR_WATCHDOG_TIMEOUT triggers (regression)... ");
assert(triggers_emergency_stop(ERROR_WATCHDOG_TIMEOUT));
printf("PASS\n");
/* --- Errors that MUST NOT escalate (recoverable / informational) --- */
printf(" Test 8: ERROR_NONE does not trigger... ");
assert(!triggers_emergency_stop(ERROR_NONE));
printf("PASS\n");
printf(" Test 9: ERROR_AD9523_CLOCK does not trigger... ");
assert(!triggers_emergency_stop(ERROR_AD9523_CLOCK));
printf("PASS\n");
printf(" Test 10: ERROR_ADF4382_TX_UNLOCK does not trigger (recoverable)... ");
assert(!triggers_emergency_stop(ERROR_ADF4382_TX_UNLOCK));
printf("PASS\n");
printf(" Test 11: ERROR_ADAR1000_COMM does not trigger... ");
assert(!triggers_emergency_stop(ERROR_ADAR1000_COMM));
printf("PASS\n");
printf(" Test 12: ERROR_IMU_COMM does not trigger... ");
assert(!triggers_emergency_stop(ERROR_IMU_COMM));
printf("PASS\n");
printf(" Test 13: ERROR_GPS_COMM does not trigger... ");
assert(!triggers_emergency_stop(ERROR_GPS_COMM));
printf("PASS\n");
printf(" Test 14: ERROR_MEMORY_ALLOC does not trigger... ");
assert(!triggers_emergency_stop(ERROR_MEMORY_ALLOC));
printf("PASS\n");
printf("\n=== Safety fix: ALL TESTS PASSED ===\n\n");
return 0;
}