Files
scrypted/plugins/objectdetector/src/ffmpeg-audiosensor.ts
2025-01-29 11:02:13 -08:00

174 lines
6.1 KiB
TypeScript

import sdk, { AudioSensor, FFmpegInput, MixinProvider, ScryptedDeviceBase, ScryptedDeviceType, ScryptedInterface, ScryptedMimeTypes, SettingValue, VideoCamera, WritableDeviceState } from "@scrypted/sdk";
import { SettingsMixinDeviceBase, SettingsMixinDeviceOptions } from "@scrypted/sdk/settings-mixin";
import { StorageSettings } from "@scrypted/sdk/storage-settings";
import { startRtpForwarderProcess } from '../../webrtc/src/rtp-forwarders';
import { RtpPacket } from "../../../external/werift/packages/rtp/src/rtp/rtp";
import { sleep } from "@scrypted/common/src/sleep";
function pcmU8ToDb(payload: Uint8Array): number {
let sum = 0;
const count = payload.length;
if (count === 0) return 0; // Treat empty input as silence (0 dB)
for (let i = 0; i < count; i++) {
const sample = payload[i] - 128; // Convert to signed range (-128 to 127)
sum += sample * sample;
}
const rms = Math.sqrt(sum / count);
const minRMS = 1.0; // Define a minimum reference level to avoid log(0)
if (rms < minRMS) return 0; // Silence is 0 dB
const db = 20 * Math.log10(rms / minRMS); // Scale against the minimum audible level
return db;
}
class FFmpegAudioDetectionMixin extends SettingsMixinDeviceBase<AudioSensor> implements AudioSensor {
storageSettings = new StorageSettings(this, {
decibelThreshold: {
title: 'Decibel Threshold',
type: 'number',
description: 'The decibel level at which to trigger an event.',
defaultValue: 20,
},
audioTimeout: {
title: 'Audio Timeout',
type: 'number',
description: 'The number of seconds to wait after the last audio event before resetting the audio sensor.',
defaultValue: 10,
},
});
ensureInterval: NodeJS.Timeout;
forwarder: ReturnType<typeof startRtpForwarderProcess>;
audioResetInterval: NodeJS.Timeout;
constructor(options: SettingsMixinDeviceOptions<AudioSensor>) {
super(options);
this.ensureInterval = setInterval(() => this.ensureAudioSensor(), 60000);
this.ensureAudioSensor();
};
ensureAudioSensor() {
if (!this.ensureInterval)
return;
if (this.forwarder)
return;
this.audioDetected = false;
clearInterval(this.audioResetInterval);
this.audioResetInterval = undefined;
const fp = this.ensureAudioSensorInternal();
this.forwarder = fp;
fp.catch(() => {
if (this.forwarder === fp)
this.forwarder = undefined;
});
this.forwarder.then(f => {
f.killPromise.then(() => {
if (this.forwarder === fp)
this.forwarder = undefined;
});
})
}
async ensureAudioSensorInternal() {
await sleep(5000);
if (!this.forwarder)
throw new Error('released/killed');
const realDevice = sdk.systemManager.getDeviceById<VideoCamera>(this.id);
const mo = await realDevice.getVideoStream({
video: null,
audio: {},
});
const ffmpegInput = await sdk.mediaManager.convertMediaObjectToJSON<FFmpegInput>(mo, ScryptedMimeTypes.FFmpegInput);
let lastAudio = 0;
const forwarder = await startRtpForwarderProcess(this.console, ffmpegInput, {
video: null,
audio: {
codecCopy: 'pcm_u8',
encoderArguments: [
'-acodec', 'pcm_u8',
'-ac', '1',
'-ar', '8000',
],
onRtp: rtp => {
const now = Date.now();
// if this.audioDetected is true skip the processing unless the lastAudio time is halfway through the interval
if (this.audioDetected && now - lastAudio < this.storageSettings.values.audioTimeout * 500)
return;
const packet = RtpPacket.deSerialize(rtp);
const decibels = pcmU8ToDb(packet.payload);
if (decibels < this.storageSettings.values.decibelThreshold)
return;
this.audioDetected = true;
lastAudio = now;
},
}
});
this.audioResetInterval = setInterval(() => {
if (!this.audioDetected)
return;
if (Date.now() - lastAudio < this.storageSettings.values.audioTimeout * 1000)
return;
this.audioDetected = false;
}, this.storageSettings.values.audioTimeout * 1000);
return forwarder;
}
async getMixinSettings() {
return this.storageSettings.getSettings();
}
putMixinSetting(key: string, value: SettingValue) {
return this.storageSettings.putSetting(key, value);
}
async release() {
this.forwarder?.then(f => f.kill());
this.forwarder = undefined;
clearInterval(this.ensureInterval);
this.ensureInterval = undefined;
clearTimeout(this.audioResetInterval);
this.audioResetInterval = undefined;
}
}
export class FFmpegAudioDetectionMixinProvider extends ScryptedDeviceBase implements MixinProvider {
async canMixin(type: ScryptedDeviceType, interfaces: string[]) {
if (type !== ScryptedDeviceType.Camera && type !== ScryptedDeviceType.Doorbell)
return;
if (!interfaces.includes(ScryptedInterface.VideoCamera))
return;
return [ScryptedInterface.AudioSensor, ScryptedInterface.Settings];
}
async getMixin(mixinDevice: any, mixinDeviceInterfaces: ScryptedInterface[], mixinDeviceState: WritableDeviceState): Promise<any> {
return new FFmpegAudioDetectionMixin({
group: 'Audio Detection',
groupKey: 'audio-detection',
mixinDevice,
mixinDeviceInterfaces,
mixinDeviceState,
mixinProviderNativeId: this.nativeId,
});
}
async releaseMixin(id: string, mixinDevice: any) {
await (mixinDevice as FFmpegAudioDetectionMixin)?.release();
}
}