server: Improve plugin health check

This commit is contained in:
Koushik Dutta
2024-05-03 18:07:17 -07:00
parent b8bb6dfa61
commit 9c9e29068b
5 changed files with 52 additions and 11 deletions

View File

@@ -644,7 +644,7 @@ class ObjectDetectionModel(TypedDict):
class ObjectDetectionSession(TypedDict):
batch: float
batch: float # Denotes that this is the first sample in a batch of samples.
settings: Any
sourceId: str
zones: list[ObjectDetectionZone]

View File

@@ -1,12 +1,12 @@
{
"name": "@scrypted/server",
"version": "0.100.0",
"version": "0.100.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@scrypted/server",
"version": "0.100.0",
"version": "0.100.1",
"hasInstallScript": true,
"license": "ISC",
"dependencies": {

View File

@@ -754,6 +754,13 @@ class PluginRemote:
raise Exception(f'unknown service {name}')
async def start_stats_runner(self):
pong = None
async def ping(time: int):
nonlocal pong
pong = pong or await self.peer.getParam('pong')
await pong(time)
self.peer.params['ping'] = ping
update_stats = await self.peer.getParam('updateStats')
if not update_stats:
print('host did not provide update_stats')

View File

@@ -350,11 +350,38 @@ export class PluginHost {
// the plugin is expected to send process stats every 10 seconds.
// this can be used as a check for liveness.
let lastStats: number;
const statsInterval = setInterval(async () => {
this.peer.params.updateStats = (stats: any) => {
lastStats = Date.now();
this.stats = stats;
}
let lastPong: number;
this.peer.params.pong = (time: number) => {
lastPong = time;
};
(async () => {
try {
let pingPromise: Promise<any>
while (!this.killed) {
await sleep(30000);
if (this.killed)
return;
pingPromise ||= await this.peer.getParam('ping');
const ping = await pingPromise;
await ping(Date.now());
}
}
catch (e) {
logger.log('e', 'plugin ping failed. restarting.');
this.api.requestRestart();
}
})();
const healthInterval = setInterval(async () => {
const now = Date.now();
// plugin may take a while to install, so wait 10 minutes.
// after that, require 1 minute checkins.
if (!lastStats) {
if (!lastStats || !lastPong) {
if (now - startupTime > 10 * 60 * 1000) {
const logger = await this.api.getLogger(undefined);
logger.log('e', 'plugin failed to start in a timely manner. restarting.');
@@ -364,15 +391,16 @@ export class PluginHost {
}
if (!pluginDebug && (lastStats + 60000 < now)) {
const logger = await this.api.getLogger(undefined);
logger.log('e', 'plugin is unresponsive. restarting.');
logger.log('e', 'plugin is not reporting stats. restarting.');
this.api.requestRestart();
}
if (!pluginDebug && (lastPong + 60000 < now)) {
const logger = await this.api.getLogger(undefined);
logger.log('e', 'plugin is not responding to ping. restarting.');
this.api.requestRestart();
}
}, 60000);
this.peer.killed.finally(() => clearInterval(statsInterval));
this.peer.params.updateStats = (stats: any) => {
lastStats = Date.now();
this.stats = stats;
}
this.peer.killed.finally(() => clearInterval(healthInterval));
}
async createRpcIoPeer(socket: IOServerSocket, accessControls: AccessControls) {

View File

@@ -285,6 +285,12 @@ export function startPluginRemote(mainFilename: string, pluginId: string, peerSe
// start the stats updater/watchdog after installation has finished, as that may take some time.
peer.getParam('updateStats').then(updateStats => startStatsUpdater(allMemoryStats, updateStats));
let pong: (time: number) => Promise<void>;
peer.params.ping = async (time: number) => {
pong ||= await peer.getParam('pong');
await pong(time);
};
const main = pluginReader('main.nodejs.js');
const script = main.toString();