aquarium_control/watchmen/
watchdog.rs

1/* Copyright 2025 Uwe Martin
2
3Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
5The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
7THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8*/
9use crate::utilities::proc_ext_req::ProcessExternalRequestTrait;
10use crate::watchmen::petting::PettingTrait;
11use crate::watchmen::watchdog_channels::WatchDogChannels;
12use log::error;
13use spin_sleep::SpinSleeper;
14use std::time::Duration;
15
16#[cfg(all(target_os = "linux", not(test)))]
17use log::info;
18
19use crate::utilities::acknowledge_signal_handler::AcknowledgeSignalHandlerTrait;
20use crate::watchmen::watchdog_config::WatchdogConfig;
21#[cfg(all(target_os = "linux", not(test)))]
22use nix::unistd::gettid;
23use thiserror::Error;
24
25/// Contains the error definition for Watchdog
26#[derive(Error, Debug)]
27pub enum WatchdogError {
28    /// Duration between two heartbeats is zero.
29    #[error("[{0}] Duration between two watchdog heartbeats is zero - please assign a value > 0")]
30    DurationBetweenTwoHeartbeatsIsZero(String),
31
32    /// Watchdog filename is empty.
33    #[error("[{0}] Watchdog filename is empty - please assign a value")]
34    FilenameIsEmpty(String),
35
36    /// Watchdog heartbeat message is empty.
37    #[error("[{0}] Watchdog heartbeat message is empty - please assign a value")]
38    HeartbeatMessageIsEmpty(String),
39
40    /// Watchdog deactivation message is empty.
41    #[error("[{0}] Watchdog deactivation message is empty - please assign a value")]
42    DeactivationMessageIsEmpty(String),
43
44    /// Watchdog heartbeat and deactivation messages are identical.
45    #[error("[{0}] watchdog heartbeat and deactivation message are identical ({1}) - please assign different values")]
46    HeartbeatAndDeactivationMessagesAreIdentical(String, String),
47}
48
49/// Represents the Watchdog responsible for activating and petting the watchdog.
50///
51/// This struct encapsulates the configuration, interface details, and internal state
52/// required to communicate with the operating system.
53///
54/// Thread Communication:
55/// This module is designed to run in its own dedicated thread.
56/// It reads commands provided by the messaging system and acts accordingly.
57/// It responds to `Quit` commands for graceful shutdown.
58///
59/// Platform-Specific Behavior:
60/// The watchdog logic will only work on Linux systems but compile for any other system as well.
61///
62/// Thread communication of this component is as follows:
63/// ```mermaid
64/// graph LR
65///     messaging[Messaging] --> ds18b20[Ds18b20]
66///     signal_handler[SignalHandler] --> ds18b20
67/// ```
68pub struct Watchdog {
69    config: WatchdogConfig,
70}
71
72impl Watchdog {
73    /// Creates a new `Watchdog` instance.
74    ///
75    /// This constructor initializes the watchdog communication module with its specified
76    /// configuration. It performs critical **validation checks** on the configuration
77    /// parameters to ensure the watchdog can operate correctly and safely.
78    ///
79    /// **Validation checks performed: **
80    /// - `heartbeat_duration_millis` must be greater than zero.
81    /// - `watchdog_filename` cannot be an empty string.
82    /// - `watchdog_heartbeat` message cannot be an empty string.
83    /// - `watchdog_deactivation` message cannot be an empty string.
84    /// - `watchdog_heartbeat` and `watchdog_deactivation` messages must be different.
85    ///
86    /// # Arguments
87    /// * `config` - **Configuration data** for the watchdog communication, loaded from a TOML file.
88    ///   This includes filename, heartbeat messages, and timing parameters.
89    ///
90    /// # Returns
91    /// A new **`Watchdog` struct**, ready for sending heartbeats or respond to termination requests.
92    ///
93    /// # Panics
94    /// This function will **panic** if any of the configuration validation checks fail,
95    /// as an incorrectly configured watchdog can lead to system instability or unintended resets.
96    pub fn new(config: WatchdogConfig) -> Result<Watchdog, WatchdogError> {
97        if config.heartbeat_duration_millis == 0 {
98            return Err(WatchdogError::DurationBetweenTwoHeartbeatsIsZero(
99                module_path!().to_string(),
100            ));
101        }
102        if config.watchdog_filename.is_empty() {
103            return Err(WatchdogError::FilenameIsEmpty(module_path!().to_string()));
104        }
105        if config.watchdog_heartbeat.is_empty() {
106            return Err(WatchdogError::HeartbeatMessageIsEmpty(
107                module_path!().to_string(),
108            ));
109        }
110        if config.watchdog_deactivation.is_empty() {
111            return Err(WatchdogError::DeactivationMessageIsEmpty(
112                module_path!().to_string(),
113            ));
114        }
115        if config.watchdog_deactivation == config.watchdog_heartbeat {
116            return Err(WatchdogError::HeartbeatAndDeactivationMessagesAreIdentical(
117                module_path!().to_string(),
118                config.watchdog_deactivation,
119            ));
120        }
121
122        Ok(Watchdog { config })
123    }
124
125    /// Executes the main control loop for the watchdog communication module.
126    ///
127    /// This function runs continuously, managing the periodic sending of heartbeats
128    /// to a system watchdog. It ensures the application's liveness is maintained
129    /// and handles various external commands for controlling its operation.
130    ///
131    /// **Key Operations:**
132    /// - **Heartbeat Sending**: If `active` and not `inhibited`, it calls the `petting.pet()`
133    ///   method to send a heartbeat message to the configured `watchdog_filename`.
134    /// - **Cycle Management**: It sleeps for the configured `heartbeat_duration_millis`
135    ///   between each heartbeat.
136    /// - **External Control**: It processes `Start` and `Stop` commands received from
137    ///   messaging or the signal handler to `inhibit` or re-enable watchdog activity.
138    /// - **Graceful Deactivation**: Upon receiving a `Quit` command, it sends a final
139    ///   `watchdog_deactivation` message before exiting the loop.
140    /// - **Shutdown Confirmation**: After exiting the loop, it sends a confirmation back
141    ///   to the signal handler.
142    ///
143    /// # Arguments
144    /// * `watchdog_channels` - A mutable reference to the struct containing the channels.
145    /// * `petting` - A mutable reference to an object implementing the `PettingTrait`,
146    ///   responsible for the actual writing of heartbeat/deactivation messages to the watchdog file.
147    pub fn execute(
148        &mut self,
149        watchdog_channels: &mut WatchDogChannels,
150        petting: &mut impl PettingTrait,
151    ) {
152        #[cfg(all(target_os = "linux", not(test)))]
153        info!(target: module_path!(), "Thread started with TID: {}", gettid());
154
155        let spin_sleeper = SpinSleeper::default();
156        let sleep_duration = Duration::from_millis(self.config.heartbeat_duration_millis);
157        let mut quit_command_received: bool; // the request to end the application has been received
158        let mut stop_command_received: bool; // the request to (temporarily) stop watchdog communication has been received
159        let mut start_command_received: bool; // the request to (temporarily) stop watchdog communication has been received
160        let mut inhibited: bool = false;
161
162        loop {
163            if self.config.active && !inhibited {
164                // Send the heartbeat to the watchdog
165                petting.pet(
166                    &self.config.watchdog_filename,
167                    &self.config.watchdog_heartbeat,
168                );
169            }
170            spin_sleeper.sleep(sleep_duration);
171            (
172                quit_command_received,
173                start_command_received,
174                stop_command_received,
175            ) = self.process_external_request(
176                &mut watchdog_channels.rx_watchdog_from_signal_handler,
177                watchdog_channels.rx_watchdog_from_messaging_opt.as_mut(),
178            );
179
180            if quit_command_received {
181                if self.config.active {
182                    // Deactivate the watchdog functionality
183                    petting.pet(
184                        &self.config.watchdog_filename,
185                        &self.config.watchdog_deactivation,
186                    );
187                }
188                break;
189            }
190
191            // For testing purposes, allow deactivation of the watchdog communication.
192            // This will trigger a reset of the system.
193            if start_command_received {
194                inhibited = false;
195            }
196            if stop_command_received {
197                inhibited = true;
198            }
199        }
200
201        watchdog_channels.acknowledge_signal_handler();
202    }
203}
204
205#[cfg(test)]
206pub mod tests {
207    use crate::launch::channels::Channels;
208    use crate::mocks::mock_petting::tests::MockPetting;
209    use crate::utilities::channel_content::InternalCommand;
210    use crate::utilities::config::{read_config_file, ConfigData};
211    use crate::watchmen::petting::Petting;
212    use crate::watchmen::watchdog::{Watchdog, WatchdogError};
213    use spin_sleep::SpinSleeper;
214    use std::fs::OpenOptions;
215    use std::io::Read;
216    use std::thread::scope;
217    use std::time::Duration;
218
219    #[test]
220    // test if Watchdog::new fails if the configuration contains an empty watchdog filename
221    fn test_new_fails_with_empty_filename() {
222        // Arrange
223        let mut config: ConfigData =
224            read_config_file("/config/aquarium_control_test_generic.toml".to_string()).unwrap();
225        config.watchdog.watchdog_filename = "".to_string();
226
227        // Act
228        let result = Watchdog::new(config.watchdog);
229
230        // Assert
231        assert!(matches!(result, Err(WatchdogError::FilenameIsEmpty(_))));
232    }
233
234    #[test]
235    // test if Watchdog::new fails if the configuration contains an empty heartbeat message
236    fn test_new_fails_with_empty_heartbeat() {
237        // Arrange
238        let mut config: ConfigData =
239            read_config_file("/config/aquarium_control_test_generic.toml".to_string()).unwrap();
240        config.watchdog.watchdog_heartbeat = "".to_string();
241
242        // Act
243        let result = Watchdog::new(config.watchdog);
244
245        // Assert
246        assert!(matches!(
247            result,
248            Err(WatchdogError::HeartbeatMessageIsEmpty(_))
249        ));
250    }
251
252    #[test]
253    // test if Watchdog::new fails if the configuration contains an empty deactivation message
254    fn test_new_fails_with_empty_deactivation_message() {
255        // Arrange
256        let mut config: ConfigData =
257            read_config_file("/config/aquarium_control_test_generic.toml".to_string()).unwrap();
258        config.watchdog.watchdog_deactivation = "".to_string();
259
260        // Act
261        let result = Watchdog::new(config.watchdog);
262
263        // Assert
264        assert!(matches!(
265            result,
266            Err(WatchdogError::DeactivationMessageIsEmpty(_))
267        ));
268    }
269
270    #[test]
271    // test if Watchdog::new fails if the configuration contains identical heartbeat and deactivation messages
272    fn test_new_fails_with_identical_messages() {
273        // Arrange
274        let mut config: ConfigData =
275            read_config_file("/config/aquarium_control_test_generic.toml".to_string()).unwrap();
276        config.watchdog.watchdog_deactivation = "T".to_string();
277        config.watchdog.watchdog_heartbeat = "T".to_string();
278
279        // Act
280        let result = Watchdog::new(config.watchdog);
281
282        // Assert
283        assert!(matches!(
284            result,
285            Err(WatchdogError::HeartbeatAndDeactivationMessagesAreIdentical(
286                _,
287                _
288            ))
289        ));
290    }
291
292    #[test]
293    // test if Watchdog::new fails if the configuration contains a zero-heartbeat duration
294    fn test_new_fails_with_zero_heartbeat_duration() {
295        // Arrange
296        let mut config: ConfigData =
297            read_config_file("/config/aquarium_control_test_generic.toml".to_string()).unwrap();
298        config.watchdog.heartbeat_duration_millis = 0;
299
300        // Act
301        let result = Watchdog::new(config.watchdog);
302
303        // Assert
304        assert!(matches!(
305            result,
306            Err(WatchdogError::DurationBetweenTwoHeartbeatsIsZero(_))
307        ));
308    }
309
310    #[test]
311    // test if the watchdog is executing the petting in time
312    fn test_watchdog_execute_petting() {
313        let config: ConfigData =
314            read_config_file("/config/aquarium_control_test_generic.toml".to_string()).unwrap();
315
316        let mut channels = Channels::new_for_test();
317
318        let min_duration = Duration::from_millis(config.watchdog.heartbeat_duration_millis - 50);
319        let max_duration = Duration::from_millis(config.watchdog.heartbeat_duration_millis + 50);
320
321        let reference_heartbeat_signal = config.watchdog.watchdog_heartbeat.clone();
322        let reference_deactivation_signal = config.watchdog.watchdog_deactivation.clone();
323
324        let mut watchdog = Watchdog::new(config.watchdog).unwrap();
325        let mut mock_petting = MockPetting::new();
326
327        scope(|scope| {
328            // thread for the test environment
329            scope.spawn(move || {
330                let sleep_duration_test_environment = Duration::from_millis(3100);
331                let spin_sleeper_test_environment = SpinSleeper::default();
332
333                spin_sleeper_test_environment.sleep(sleep_duration_test_environment);
334                channels
335                    .signal_handler
336                    .send_to_watchdog(InternalCommand::Terminate)
337                    .unwrap();
338                channels.signal_handler.receive_from_watchdog().unwrap();
339            });
340
341            // thread for the test object
342            scope.spawn(move || {
343                watchdog.execute(
344                    &mut channels.watchdog,
345                    &mut mock_petting,
346                );
347
348                assert_eq!(mock_petting.petting_recorder.len(), 5);
349
350                // Check intervals between consecutive heartbeats
351                // Iterate from the second timestamp (index 1)
352                for i in 1..mock_petting.petting_recorder.len() {
353                    let previous_ts = mock_petting.petting_recorder[i - 1];
354                    let current_ts = mock_petting.petting_recorder[i];
355                    let delta_duration = current_ts.duration_since(previous_ts);
356
357                    // Assert that the delta is within the allowed range
358                    assert!(
359                        delta_duration >= min_duration && delta_duration <= max_duration,
360                        "Heartbeat delta {:?} at index {} is outside the allowed range [{:?}, {:?}]",
361                        delta_duration,
362                        i - 1,
363                        min_duration,
364                        max_duration,
365                    );
366                }
367
368                // Assert that the first 4 entries are "1"
369                for i in 0..4 {
370                    assert_eq!(
371                        mock_petting.signal_recorder[i],
372                        reference_heartbeat_signal,
373                        "Signal at index {} was expected to be '{}', but was '{}'",
374                        i,
375                        reference_deactivation_signal,
376                        mock_petting.signal_recorder[i]
377                    );
378                }
379
380                assert_eq!(
381                    mock_petting.signal_recorder[4],
382                    reference_deactivation_signal,
383                    "Signal was expected to be '{}', but was '{}'",
384                    reference_heartbeat_signal,
385                    mock_petting.signal_recorder.last().unwrap()
386                );
387            });
388        });
389    }
390
391    #[test]
392    // test if the watchdog is writing the right heartbeat messages to the watchdog file
393    fn test_watchdog_check_heartbeat_message() {
394        let mut config: ConfigData =
395            read_config_file("/config/aquarium_control_test_generic.toml".to_string()).unwrap();
396
397        config.watchdog.watchdog_filename = "/var/log/watchdog.log".to_string();
398
399        let mut channels = Channels::new_for_test();
400
401        let reference_heartbeat_signal = config.watchdog.watchdog_heartbeat.clone();
402        let reference_deactivation_signal = config.watchdog.watchdog_deactivation.clone();
403
404        // Erase content of the watchdog file
405        let file = OpenOptions::new()
406            .write(true)
407            .open(config.watchdog.watchdog_filename.clone())
408            .unwrap();
409        _ = file.set_len(0); // This is the core operation
410
411        let watchdog_filename_for_assert = config.watchdog.watchdog_filename.clone();
412        let mut watchdog = Watchdog::new(config.watchdog).unwrap();
413        let mut petting = Petting::new();
414
415        scope(|scope| {
416            // thread for the test environment
417            scope.spawn(move || {
418                let sleep_duration_test_environment = Duration::from_millis(3100);
419                let spin_sleeper_test_environment = SpinSleeper::default();
420
421                spin_sleeper_test_environment.sleep(sleep_duration_test_environment);
422                channels
423                    .signal_handler
424                    .send_to_watchdog(InternalCommand::Terminate)
425                    .unwrap();
426                channels.signal_handler.receive_from_watchdog().unwrap();
427            });
428
429            // thread for the assertions
430            scope.spawn(move || {
431                let sleep_duration_assert1 = Duration::from_millis(500);
432                let sleep_duration_assert2 = Duration::from_millis(4500);
433                let spin_sleeper_asserts = SpinSleeper::default();
434
435                // let the test object start writing to the watchdog file
436                spin_sleeper_asserts.sleep(sleep_duration_assert1);
437
438                // assert the heartbeat message
439                let mut file = OpenOptions::new()
440                    .read(true)
441                    .open(watchdog_filename_for_assert.clone())
442                    .unwrap();
443                let mut content = String::new();
444                file.read_to_string(&mut content).unwrap();
445                assert_eq!(content, reference_heartbeat_signal);
446
447                // wait for the termination of the test object
448                spin_sleeper_asserts.sleep(sleep_duration_assert2);
449
450                // assert the watchdog deactivation message
451                let mut file = OpenOptions::new()
452                    .read(true)
453                    .open(watchdog_filename_for_assert)
454                    .unwrap();
455                let mut content = String::new();
456                file.read_to_string(&mut content).unwrap();
457                assert_eq!(content, reference_deactivation_signal);
458            });
459
460            // thread for the test object
461            scope.spawn(move || {
462                watchdog.execute(&mut channels.watchdog, &mut petting);
463            });
464        });
465    }
466}