From 23becfdffc0dbf9917421a3d0d610f6152bcdcd6 Mon Sep 17 00:00:00 2001 From: Koushik Dutta Date: Tue, 14 Sep 2021 23:21:40 -0700 Subject: [PATCH] homekit: camera talk back support --- plugins/homekit/package.json | 3 +- plugins/homekit/src/common.ts | 11 +- plugins/homekit/src/intercom.ts | 109 -------------- plugins/homekit/src/main.ts | 7 +- plugins/homekit/src/{ => rtp}/rtp-demuxer.ts | 9 +- plugins/homekit/src/rtp/rtp-ffmpeg-input.ts | 146 +++++++++++++++++++ plugins/homekit/src/types/camera.ts | 83 ++++++----- plugins/homekit/src/types/doorbell.ts | 6 +- plugins/homekit/src/types/mediaplayer.ts | 2 +- 9 files changed, 215 insertions(+), 161 deletions(-) delete mode 100644 plugins/homekit/src/intercom.ts rename plugins/homekit/src/{ => rtp}/rtp-demuxer.ts (92%) create mode 100644 plugins/homekit/src/rtp/rtp-ffmpeg-input.ts diff --git a/plugins/homekit/package.json b/plugins/homekit/package.json index a28b454cc..43cae6bb1 100644 --- a/plugins/homekit/package.json +++ b/plugins/homekit/package.json @@ -24,7 +24,8 @@ "interfaces": [ "MixinProvider", "Settings" - ] + ], + "realfs": true }, "dependencies": { "hap-nodejs": "file:../HAP-NodeJS", diff --git a/plugins/homekit/src/common.ts b/plugins/homekit/src/common.ts index 35d62a9ac..37d71e31b 100644 --- a/plugins/homekit/src/common.ts +++ b/plugins/homekit/src/common.ts @@ -1,16 +1,25 @@ import { EventListenerRegister, ScryptedDevice, ScryptedDeviceType, ScryptedInterface } from '@scrypted/sdk'; import { Accessory, Service } from './hap'; +import throttle from 'lodash/throttle'; export interface DummyDevice { interfaces?: string[]; type?: ScryptedDeviceType; } +export interface SnapshotThrottle { + (): Promise; +} + +export interface HomeKitSession { + snapshotThrottles: Map; +} + interface SupportedType { type: ScryptedDeviceType; probe(device: DummyDevice): boolean; - getAccessory: (device: ScryptedDevice & any) => Accessory; + getAccessory: (device: ScryptedDevice & any, homekitSession: HomeKitSession) => Accessory; noBridge?: boolean; } diff --git a/plugins/homekit/src/intercom.ts b/plugins/homekit/src/intercom.ts deleted file mode 100644 index 94b8586dd..000000000 --- a/plugins/homekit/src/intercom.ts +++ /dev/null @@ -1,109 +0,0 @@ -import sdk from "@scrypted/sdk"; -import { listenZeroCluster } from "@scrypted/common/src/listen-cluster"; -import { FFMpegInput, Intercom, ScryptedDevice } from "@scrypted/sdk"; -import { createSocket, Socket, SocketType } from "dgram"; -import { createServer, Server } from "net"; -import child_process from "child_process"; -import { ffmpegLogInitialOutput } from "@scrypted/common/src/ffmpeg-helper"; -import { FFMpegRebroadcastSession, startRebroadcastSession } from "@scrypted/common/src/ffmpeg-rebroadcast"; - -const { mediaManager } = sdk; - -async function pickPort(socketType: SocketType) { - // const socket = createSocket(socketType); - // return await new Promise(resolve => socket.bind(0, () => { - // const { port } = socket.address(); - // socket.close(() => resolve(port)); - // })); - return Math.round(Math.abs(Math.random()) * 40000 + 10000); -} - -export class IntercomSession { - sdpReturnAudio: string; - sdpServer: Server; - session: FFMpegRebroadcastSession; - port: number; - heartbeatTimer: NodeJS.Timeout; - - constructor(public device: ScryptedDevice & Intercom, public socketType: SocketType, public address: string, public srtp: Buffer) { - - } - - async start(): Promise { - const sdpIpVersion = this.socketType === "udp6" ? "IP6 " : "IP4"; - this.port = await pickPort(this.socketType); - - // Session description protocol message that FFmpeg will share with HomeKit. - // SDP messages tell the other side of the connection what we're expecting to receive. - // - // Parameters are: - // v protocol version - always 0. - // o originator and session identifier. - // s session description. - // c connection information. - // t timestamps for the start and end of the session. - // m media type - audio, adhering to RTP/AVP, payload type 110. - // b bandwidth information - application specific, 24k. - // a=rtpmap payload type 110 corresponds to an MP4 stream. - // a=fmtp for payload type 110, use these format parameters. - // a=crypto crypto suite to use for this session. - this.sdpReturnAudio = [ - "v=0", - "o=- 0 0 IN " + sdpIpVersion + " 127.0.0.1", - "s=" + this.device.name + " Audio Talkback", - "c=IN " + sdpIpVersion + " " + this.address, - "t=0 0", - "m=audio " + this.port + " RTP/AVP 110", - "b=AS:24", - "a=rtpmap:110 MPEG4-GENERIC/16000/1", - "a=fmtp:110 profile-level-id=1;mode=AAC-hbr;sizelength=13;indexlength=3;indexdeltalength=3; config=F8F0212C00BC00", - "a=crypto:1 AES_CM_128_HMAC_SHA1_80 inline:" + this.srtp.toString("base64") - ].join("\n"); - - this.sdpServer = createServer(socket => { - this.sdpServer.close(); - socket.write(this.sdpReturnAudio); - socket.end(); - }); - const sdpPort = await listenZeroCluster(this.sdpServer); - console.log('sdp port', sdpPort); - - const ffmpegInput: FFMpegInput = { - inputArguments: [ - "-f", "sdp", - "-acodec", "libfdk_aac", - "-ac", '1', - "-i", `tcp://127.0.0.1:${sdpPort}`, - ] - }; - - this.session = await startRebroadcastSession(ffmpegInput, { - vcodec: ['-vn'], - acodec: ['-acodec', 'libfdk_aac', '-ac', '1'], - outputFormat: 'adts', - }); - - return this.session; - } - - // Send a regular heartbeat to FFmpeg to ensure the pipe remains open and the process alive. - heartbeat(socket: Socket, heartbeat: Buffer): void { - - // Clear the old heartbeat timer. - clearTimeout(this.heartbeatTimer); - - // Send a heartbeat to FFmpeg every few seconds to keep things open. FFmpeg has a five-second timeout - // in reading input, and we want to be comfortably within the margin for error to ensure the process - // continues to run. - this.heartbeatTimer = setTimeout(() => { - socket.send(heartbeat, this.port); - this.heartbeat(socket, heartbeat); - - }, 3.5 * 1000); - } - - destroy() { - this.sdpServer?.close(); - this.session?.kill(); - } -} \ No newline at end of file diff --git a/plugins/homekit/src/main.ts b/plugins/homekit/src/main.ts index 27ffaa911..e6e739435 100644 --- a/plugins/homekit/src/main.ts +++ b/plugins/homekit/src/main.ts @@ -1,7 +1,7 @@ import sdk, { Settings, MixinProvider, ScryptedDeviceBase, ScryptedDeviceType, Setting, ScryptedInterface, ScryptedInterfaceProperty, MixinDeviceBase, Camera, MediaObject } from '@scrypted/sdk'; import { Bridge, Categories, Characteristic, HAPStorage, PublishInfo, Service } from './hap'; import os from 'os'; -import { supportedTypes } from './common'; +import { HomeKitSession, SnapshotThrottle, supportedTypes } from './common'; import './types' import { CameraMixin } from './camera-mixin'; import { maybeAddBatteryService } from './battery'; @@ -53,8 +53,9 @@ const uuid = localStorage.getItem('uuid'); const includeToken = 4; -class HomeKit extends ScryptedDeviceBase implements MixinProvider, Settings { +class HomeKit extends ScryptedDeviceBase implements MixinProvider, Settings, HomeKitSession { bridge = new Bridge('Scrypted', uuid); + snapshotThrottles = new Map(); constructor() { super(); @@ -147,7 +148,7 @@ class HomeKit extends ScryptedDeviceBase implements MixinProvider, Settings { continue; } - const accessory = supportedType.getAccessory(device); + const accessory = supportedType.getAccessory(device, this); if (accessory) { accessoryIds.add(id); diff --git a/plugins/homekit/src/rtp-demuxer.ts b/plugins/homekit/src/rtp/rtp-demuxer.ts similarity index 92% rename from plugins/homekit/src/rtp-demuxer.ts rename to plugins/homekit/src/rtp/rtp-demuxer.ts index d1d411081..def0c57e9 100644 --- a/plugins/homekit/src/rtp-demuxer.ts +++ b/plugins/homekit/src/rtp/rtp-demuxer.ts @@ -37,6 +37,7 @@ export class RtpDemuxer extends EventEmitter { // Split the message into RTP and RTCP packets. this.socket.on("message", (msg) => { + console.log('rtsp message'); // Send RTP packets to the RTP port. if (this.isRtpMessage(msg)) { @@ -50,14 +51,6 @@ export class RtpDemuxer extends EventEmitter { this.deviceName); } - // Close the socket and cleanup. - public close(): void { - this.console.log("%s: Closing the RtpDemuxer instance on port %s.", this.deviceName); - - clearTimeout(this.heartbeatTimer); - this.socket.close(); - } - // Retrieve the payload information from a packet to discern what the packet payload is. private getPayloadType(message: Buffer): number { return message.readUInt8(1) & 0x7f; diff --git a/plugins/homekit/src/rtp/rtp-ffmpeg-input.ts b/plugins/homekit/src/rtp/rtp-ffmpeg-input.ts new file mode 100644 index 000000000..32602e45b --- /dev/null +++ b/plugins/homekit/src/rtp/rtp-ffmpeg-input.ts @@ -0,0 +1,146 @@ +import sdk from "@scrypted/sdk"; +import { listenZeroCluster } from "@scrypted/common/src/listen-cluster"; +import { FFMpegInput } from "@scrypted/sdk"; +import { Socket, SocketType } from "dgram"; +import { createServer, Server } from "net"; +import { AudioStreamingSamplerate } from "../hap"; + +function pickPort() { + return Math.round(Math.abs(Math.random()) * 40000 + 10000); +} + +export class HomeKitRtpSink { + heartbeatTimer: NodeJS.Timeout; + + constructor(public server: Server, public rtpPort: number, public ffmpegInput: FFMpegInput) { + } + + // Send a regular heartbeat to FFmpeg to ensure the pipe remains open and the process alive. + heartbeat(socket: Socket, heartbeat: Buffer): void { + + // Clear the old heartbeat timer. + clearTimeout(this.heartbeatTimer); + + // Send a heartbeat to FFmpeg every few seconds to keep things open. FFmpeg has a five-second timeout + // in reading input, and we want to be comfortably within the margin for error to ensure the process + // continues to run. + this.heartbeatTimer = setTimeout(() => { + socket.send(heartbeat, this.rtpPort); + this.heartbeat(socket, heartbeat); + + }, 3.5 * 1000); + } + + destroy() { + this.server?.close(); + clearTimeout(this.heartbeatTimer); + } +} + +export async function startRtpSink(socketType: SocketType, address: string, srtp: Buffer,sampleRate: AudioStreamingSamplerate) { + const sdpIpVersion = socketType === "udp6" ? "IP6 " : "IP4"; + const rtpPort = pickPort(); + + /* + https://wiki.multimedia.cx/index.php?title=MPEG-4_Audio + + 5 bits: object type + if (object type == 31) + 6 bits + 32: object type + 4 bits: frequency index + if (frequency index == 15) + 24 bits: frequency + 4 bits: channel configuration + var bits: AOT Specific Config + */ + + let csd = 'F8F0212C00BC00'; + /* + 11111000 + 11110000 <-- 111 1000 0 = object-type-extended-last-3 frequency-index channel-config-first-1 + 00100001 + 00101100 + 00000000 + 10111100 + 00000000 + + frequency index corresponds to 8: 16000 Hz + */ + + /* + There are 13 supported frequencies: + + 0: 96000 Hz + 1: 88200 Hz + 2: 64000 Hz + 3: 48000 Hz + 4: 44100 Hz + 5: 32000 Hz + 6: 24000 Hz + 7: 22050 Hz + 8: 16000 Hz + 9: 12000 Hz + 10: 11025 Hz + 11: 8000 Hz + 12: 7350 Hz + 13: Reserved + 14: Reserved + 15: frequency is written explictly + */ + + let csdBuffer = Buffer.from(csd, 'hex'); + let b = csdBuffer[1]; + b &= 0b11100001; + let fi = sampleRate === AudioStreamingSamplerate.KHZ_8 ? 11 + : sampleRate === AudioStreamingSamplerate.KHZ_24 ? 6 : 8; + b |= (fi << 1); + csdBuffer[1] = b; + csd = csdBuffer.toString('hex').toUpperCase(); + + // rewrite the frequency index to actual negotiated value. + + + // Session description protocol message that FFmpeg will share with HomeKit. + // SDP messages tell the other side of the connection what we're expecting to receive. + // + // Parameters are: + // v protocol version - always 0. + // o originator and session identifier. + // s session description. + // c connection information. + // t timestamps for the start and end of the session. + // m media type - audio, adhering to RTP/AVP, payload type 110. + // b bandwidth information - application specific, 24k. + // a=rtpmap payload type 110 corresponds to an MP4 stream. + // a=fmtp for payload type 110, use these format parameters. + // a=crypto crypto suite to use for this session. + const sdpReturnAudio = [ + "v=0", + "o=- 0 0 IN " + sdpIpVersion + " 127.0.0.1", + "s=" + "HomeKit Audio Talkback", + "c=IN " + sdpIpVersion + " " + address, + "t=0 0", + "m=audio " + rtpPort + " RTP/AVP 110", + "b=AS:24", + "a=rtpmap:110 MPEG4-GENERIC/16000/1", + "a=fmtp:110 profile-level-id=1;mode=AAC-hbr;sizelength=13;indexlength=3;indexdeltalength=3; config=" + csd, + "a=crypto:1 AES_CM_128_HMAC_SHA1_80 inline:" + srtp.toString("base64") + ].join("\n"); + + const server = createServer(socket => { + socket.write(Buffer.from(sdpReturnAudio)); + socket.end(); + }); + const sdpServerPort = await listenZeroCluster(server); + + const ffmpegInput = { + inputArguments: [ + "-protocol_whitelist", "pipe,udp,rtp,file,crypto,tcp", + "-f", "sdp", + "-acodec", "libfdk_aac", '-ac', '1', + "-i", "tcp://127.0.0.1:" + sdpServerPort, + ] + }; + + return new HomeKitRtpSink(server, rtpPort, ffmpegInput); +} \ No newline at end of file diff --git a/plugins/homekit/src/types/camera.ts b/plugins/homekit/src/types/camera.ts index a9ab62ef1..657346298 100644 --- a/plugins/homekit/src/types/camera.ts +++ b/plugins/homekit/src/types/camera.ts @@ -1,6 +1,6 @@ import { Camera, FFMpegInput, MotionSensor, ScryptedDevice, ScryptedDeviceType, ScryptedInterface, ScryptedMimeTypes, VideoCamera, AudioSensor, Intercom } from '@scrypted/sdk' -import { addSupportedType, DummyDevice } from '../common' +import { addSupportedType, DummyDevice, HomeKitSession } from '../common' import { AudioStreamingCodec, AudioStreamingCodecType, AudioStreamingSamplerate, CameraController, CameraStreamingDelegate, CameraStreamingOptions, Characteristic, H264Level, H264Profile, PrepareStreamCallback, PrepareStreamRequest, PrepareStreamResponse, SnapshotRequest, SnapshotRequestCallback, SRTPCryptoSuites, StartStreamRequest, StreamingRequest, StreamRequestCallback, StreamRequestTypes } from '../hap'; import { makeAccessory } from './common'; @@ -16,9 +16,8 @@ import { AudioRecordingCodec, AudioRecordingCodecType, AudioRecordingSamplerate, import { startFFMPegFragmetedMP4Session } from '@scrypted/common/src/ffmpeg-mp4-parser-session'; import { ffmpegLogInitialOutput } from '../../../../common/src/ffmpeg-helper'; import throttle from 'lodash/throttle'; -import { RtpDemuxer } from '../rtp-demuxer'; -import { FFMpegRebroadcastSession } from '@scrypted/common/src/ffmpeg-rebroadcast'; -import { IntercomSession } from '../intercom'; +import { RtpDemuxer } from '../rtp/rtp-demuxer'; +import { HomeKitRtpSink, startRtpSink } from '../rtp/rtp-ffmpeg-input'; const { log, mediaManager, deviceManager } = sdk; @@ -130,7 +129,7 @@ addSupportedType({ probe(device: DummyDevice) { return device.interfaces.includes(ScryptedInterface.VideoCamera); }, - getAccessory(device: ScryptedDevice & VideoCamera & Camera & MotionSensor & AudioSensor & Intercom) { + getAccessory(device: ScryptedDevice & VideoCamera & Camera & MotionSensor & AudioSensor & Intercom, homekitSession: HomeKitSession) { interface Session { request: PrepareStreamRequest; videossrc: number; @@ -139,7 +138,8 @@ addSupportedType({ videoReturn: dgram.Socket; audioReturn: dgram.Socket; demuxer?: RtpDemuxer; - intercomSession?: Promise; + rtpSink?: HomeKitRtpSink; + targetAddress?: string; } const sessions = new Map(); @@ -158,10 +158,11 @@ addSupportedType({ session.cp?.kill(); session.videoReturn?.close(); session.audioReturn?.close(); + session.rtpSink?.destroy(); } const throttledTakePicture = throttle(async () => { - console.log('snapshot throttle fetch', device.name); + // console.log(device.name, 'throttled snapshot fetch'); const media = await device.takePicture(); const jpeg = await mediaManager.convertMediaObjectToBuffer(media, 'image/jpeg'); return jpeg; @@ -170,16 +171,30 @@ addSupportedType({ trailing: true, }); + function snapshotAll() { + for (const snapshotThrottle of homekitSession.snapshotThrottles.values()) { + snapshotThrottle(); + } + } + + homekitSession.snapshotThrottles.set(device.id, throttledTakePicture); + const delegate: CameraStreamingDelegate = { async handleSnapshotRequest(request: SnapshotRequest, callback: SnapshotRequestCallback) { try { + // console.log(device.name, 'snapshot request'); + // an idle Home.app will hit this endpoint every 10 seconds, and slow requests bog up the entire app. // avoid slow requests by prefetching every 9 seconds. if (device.interfaces.includes(ScryptedInterface.Camera)) { + // snapshots are requested em masse, so trigger them rather than wait for home to + // fetch everything serially. // this call is not a bug, to force lodash to take a picture on the trailing edge, // throttle must be called twice. - throttledTakePicture(); + snapshotAll(); + snapshotAll(); + callback(null, await throttledTakePicture()); return; } @@ -222,26 +237,6 @@ addSupportedType({ audioReturn, } - const audioKey = Buffer.concat([session.request.audio.srtp_key, session.request.audio.srtp_salt]); - - if (twoWayAudio) { - session.demuxer = new RtpDemuxer(device.name, console, audioReturn); - const intercom = new IntercomSession(device, socketType, request.targetAddress, audioKey); - // const ffmpegInput = await intercom.start(); - session.demuxer.on('rtp', (buffer: Buffer) => { - audioReturn.send(buffer, intercom.port); - }); - session.demuxer.once('rtcp', () => { - intercom.start().then(_ => { - session.demuxer.on('rtcp', (buffer: Buffer) => { - intercom.heartbeat(audioReturn, buffer); - }); - }); - - }); - // session.rtpRebroadcast = intercom.session; - } - sessions.set(request.sessionID, session); const response: PrepareStreamResponse = { @@ -297,7 +292,6 @@ addSupportedType({ callback(); - const videomtu = 188 * 3; const audiomtu = 188 * 1; @@ -380,23 +374,42 @@ addSupportedType({ console.log(args); - const cp = child_process.spawn(await mediaManager.getFFmpegPath(), args, { - // stdio: 'ignore', - }); + const cp = child_process.spawn(await mediaManager.getFFmpegPath(), args); ffmpegLogInitialOutput(console, cp); session.cp = cp; + + if (twoWayAudio) { + // const demuxer = await createRtpDemuxer(audioReturn, request.audio.srtp_key, request.audio.srtp_salt); + session.demuxer = new RtpDemuxer(device.name, console, session.audioReturn); + const socketType = session.request.addressVersion === 'ipv6' ? 'udp6' : 'udp4'; + + session.rtpSink = await startRtpSink(socketType, session.request.targetAddress, + audioKey, (request as StartStreamRequest).audio.sample_rate); + + session.demuxer.on('rtp', (buffer: Buffer) => { + session.audioReturn.send(buffer, session.rtpSink.rtpPort); + }); + + session.demuxer.on('rtcp', (buffer: Buffer) => { + session.rtpSink.heartbeat(session.audioReturn, buffer); + }); + + const mo = mediaManager.createFFmpegMediaObject(session.rtpSink.ffmpegInput); + device.startIntercom(mo); + } } catch (e) { - log.e(`stream failed ${e}`); console.error('streaming error', e); } }, }; const codecs: AudioStreamingCodec[] = []; - for (const type of [AudioStreamingCodecType.OPUS, AudioStreamingCodecType.AAC_ELD]) { - for (const samplerate of [AudioStreamingSamplerate.KHZ_8, AudioStreamingSamplerate.KHZ_16, AudioStreamingSamplerate.KHZ_24]) { + // multiple audio options can be provided but lets stick with AAC ELD 24k, + // that's what the talkback ffmpeg session in rtp-ffmpeg-input.ts will use. + for (const type of [AudioStreamingCodecType.AAC_ELD]) { + for (const samplerate of [AudioStreamingSamplerate.KHZ_24]) { codecs.push({ type, samplerate, diff --git a/plugins/homekit/src/types/doorbell.ts b/plugins/homekit/src/types/doorbell.ts index 1de597411..f1e7a4179 100644 --- a/plugins/homekit/src/types/doorbell.ts +++ b/plugins/homekit/src/types/doorbell.ts @@ -1,6 +1,6 @@ import { BinarySensor, ScryptedDevice, ScryptedDeviceType, ScryptedInterface } from '@scrypted/sdk' -import { addSupportedType, DummyDevice, supportedTypes } from '../common' +import { addSupportedType, DummyDevice, HomeKitSession, supportedTypes } from '../common' import { Characteristic, CharacteristicEventTypes, CharacteristicGetCallback, Service } from '../hap'; import { makeAccessory } from './common'; @@ -9,14 +9,14 @@ addSupportedType({ probe(device: DummyDevice): boolean { return device.interfaces.includes(ScryptedInterface.BinarySensor); }, - getAccessory: (device: ScryptedDevice & BinarySensor) => { + getAccessory: (device: ScryptedDevice & BinarySensor, homekitSession: HomeKitSession) => { const faux: DummyDevice = { interfaces: device.interfaces, type: device.type, }; faux.type = ScryptedDeviceType.Camera; const cameraCheck = supportedTypes[ScryptedInterface.Camera]; - const accessory = cameraCheck.probe(faux) ? cameraCheck.getAccessory(device) : makeAccessory(device); + const accessory = cameraCheck.probe(faux) ? cameraCheck.getAccessory(device, homekitSession) : makeAccessory(device); const service = accessory.addService(Service.Doorbell); device.listen({ diff --git a/plugins/homekit/src/types/mediaplayer.ts b/plugins/homekit/src/types/mediaplayer.ts index 9b4c08639..b4d3101cf 100644 --- a/plugins/homekit/src/types/mediaplayer.ts +++ b/plugins/homekit/src/types/mediaplayer.ts @@ -114,7 +114,7 @@ addSupportedType({ allowedIdentifiers.add(check.id); - const input = accessory.addService(Service.InputSource, `input-${check.id}`, check.name); + const input = accessory.addService(Service.InputSource, check.name, `input-${check.id}`); input.setCharacteristic(Characteristic.Identifier, check.id) .setCharacteristic(Characteristic.ConfiguredName, check.name) .setCharacteristic(Characteristic.IsConfigured, Characteristic.IsConfigured.CONFIGURED)