two-way audio improvements:

rename pcm_ulaw to pcm_mulaw per ffmpeg codec name
support transcode free rtp forwarding of audio only streams
onvif two audio codec negotiation with upstream
This commit is contained in:
Koushik Dutta
2023-10-19 14:00:36 -07:00
parent ed35811296
commit f07604de4c
7 changed files with 128 additions and 141 deletions

View File

@@ -149,7 +149,7 @@ export function parseFmtp(msection: string[]) {
const paramLine = fmtpLine.substring(firstSpace + 1);
const payloadType = parseInt(fmtp.split(':')[1]);
if (!fmtp || !paramLine || Number.isNaN( payloadType )) {
if (!fmtp || !paramLine || Number.isNaN(payloadType)) {
return;
}
@@ -170,28 +170,43 @@ export function parseFmtp(msection: string[]) {
}
export type MSection = ReturnType<typeof parseMSection>;
export type RTPMap = ReturnType<typeof parseRtpMap>;
export function parseRtpMap(mlineType: string, rtpmap: string) {
const match = rtpmap?.match(/a=rtpmap:([\d]+) (.*?)\/([\d]+)/);
const match = rtpmap?.match(/a=rtpmap:([\d]+) (.*?)\/([\d]+)(\/([\d]+))?/);
rtpmap = rtpmap?.toLowerCase();
let codec: string;
let ffmpegEncoder: string;
if (rtpmap?.includes('mpeg4')) {
codec = 'aac';
ffmpegEncoder = 'aac';
}
else if (rtpmap?.includes('opus')) {
codec = 'opus';
ffmpegEncoder = 'libopus';
}
else if (rtpmap?.includes('pcma')) {
codec = 'pcm_alaw';
ffmpegEncoder = 'pcm_alaw';
}
else if (rtpmap?.includes('pcmu')) {
codec = 'pcm_ulaw';
codec = 'pcm_mulaw';
ffmpegEncoder = 'pcm_mulaw';
}
else if (rtpmap?.includes('g726')) {
codec = 'g726';
// disabled since it 48000 is non compliant in ffmpeg and fails.
// ffmpegEncoder = 'g726';
}
else if (rtpmap?.includes('pcm')) {
codec = 'pcm';
}
else if (rtpmap?.includes('l16')) {
codec = 'pcm_s16be';
ffmpegEncoder = 'pcm_s16be';
}
else if (rtpmap?.includes('h264')) {
codec = 'h264';
}
@@ -207,8 +222,10 @@ export function parseRtpMap(mlineType: string, rtpmap: string) {
return {
line: rtpmap,
codec,
ffmpegEncoder,
rawCodec: match?.[2],
clock: parseInt(match?.[3]),
channels: parseInt(match?.[5]) || undefined,
payloadType: parseInt(match?.[1]),
}
}
@@ -220,9 +237,11 @@ export function parseMSection(msection: string[]) {
const mline = parseMLine(msection[0]);
const rawRtpmaps = msection.filter(line => line.startsWith(artpmap));
const rtpmaps = rawRtpmaps.map(line => parseRtpMap(mline.type, line));
const codec = parseRtpMap(mline.type, rawRtpmaps[0]).codec;
// if no rtp map is specified, pcm_alaw is used. parsing a null rtpmap is valid.
const rtpmap = parseRtpMap(mline.type, rawRtpmaps[0]);
const { codec } = rtpmap;
let direction: string;
for (const checkDirection of ['sendonly', 'sendrecv', 'recvonly', 'inactive']) {
const found = msection.find(line => line === 'a=' + checkDirection);
if (found) {
@@ -239,6 +258,7 @@ export function parseMSection(msection: string[]) {
contents: msection.join('\r\n'),
control,
codec,
rtpmap,
direction,
toSdp: () => {
return ret.lines.join('\r\n');

View File

@@ -401,7 +401,7 @@ class AmcrestCamera extends RtspSmartCamera implements VideoCameraConfiguration,
else if (audioCodec?.includes('g711a'))
audioCodec = 'pcm_alaw';
else if (audioCodec?.includes('g711u'))
audioCodec = 'pcm_ulaw';
audioCodec = 'pcm_mulaw';
else if (audioCodec?.includes('g711'))
audioCodec = 'pcm';

View File

@@ -8,65 +8,8 @@ import { nextSequenceNumber } from "../../homekit/src/types/camera/jitter-buffer
import { RtspSmartCamera } from "../../rtsp/src/rtsp";
import { startRtpForwarderProcess } from '../../webrtc/src/rtp-forwarders';
const { mediaManager } = sdk;
interface SupportedCodec {
ffmpegCodec: string;
sdpName: string;
}
const supportedCodecs: SupportedCodec[] = [];
function addSupportedCodec(ffmpegCodec: string, sdpName: string) {
supportedCodecs.push({
ffmpegCodec,
sdpName,
});
}
// a=rtpmap:97 L16/8000
// a=rtpmap:100 L16/16000
// a=rtpmap:101 L16/48000
// a=rtpmap:8 PCMA/8000
// a=rtpmap:102 PCMA/16000
// a=rtpmap:103 PCMA/48000
// a=rtpmap:0 PCMU/8000
// a=rtpmap:104 PCMU/16000
// a=rtpmap:105 PCMU/48000
// a=rtpmap:106 /0
// a=rtpmap:107 /0
// a=rtpmap:108 /0
// a=rtpmap:109 MPEG4-GENERIC/8000
// a=rtpmap:110 MPEG4-GENERIC/16000
// a=rtpmap:111 MPEG4-GENERIC/48000
// this order is irrelevant, the order of preference is the sdp.
addSupportedCodec('pcm_mulaw', 'PCMU');
addSupportedCodec('pcm_alaw', 'PCMA');
addSupportedCodec('pcm_s16be', 'L16');
addSupportedCodec('adpcm_g726', 'G726');
addSupportedCodec('aac', 'MPEG4-GENERIC');
interface CodecMatch {
payloadType: string;
sdpName: string;
sampleRate: string;
channels: string;
}
const codecRegex = /a=rtpmap:\s*(\d+) (.*?)\/(\d+)/g
function* parseCodecs(audioSection: string): Generator<CodecMatch> {
for (const match of audioSection.matchAll(codecRegex)) {
const [_, payloadType, sdpName, sampleRate, _skip, channels] = match;
yield {
payloadType,
sdpName,
sampleRate,
channels,
}
}
}
const Require = 'www.onvif.org/ver20/backchannel';
export class OnvifIntercom implements Intercom {
@@ -153,18 +96,10 @@ export class OnvifIntercom implements Intercom {
}
this.camera.console.log('backchannel transport', transportDict);
const availableCodecs = [...parseCodecs(audioBackchannel.contents)];
let match: CodecMatch;
let codec: SupportedCodec;
for (const supported of availableCodecs) {
codec = supportedCodecs.find(check => check.sdpName?.toLowerCase() === supported.sdpName.toLowerCase());
if (codec) {
match = supported;
break;
}
}
const availableMatches = audioBackchannel.rtpmaps.filter(rtpmap => rtpmap.ffmpegEncoder);
const defaultMatch = audioBackchannel.rtpmaps.find(rtpmap => rtpmap.ffmpegEncoder);
if (!match)
if (!defaultMatch)
throw new Error('no supported codec was found for back channel');
let ssrcBuffer: Buffer;
@@ -178,7 +113,7 @@ export class OnvifIntercom implements Intercom {
const ssrc = ssrcBuffer.readInt32BE(0);
const ssrcUnsigned = ssrcBuffer.readUint32BE(0);
const payloadType = parseInt(match.payloadType);
let { payloadType } = defaultMatch;
await intercomClient.play({
Require,
@@ -189,16 +124,25 @@ export class OnvifIntercom implements Intercom {
const forwarder = await startRtpForwarderProcess(this.camera.console, ffmpegInput, {
audio: {
onRtp: (rtp) => {
// if (true) {
// const p = RtpPacket.deSerialize(rtp);
// p.header.payloadType = payloadType;
// p.header.ssrc = ssrcUnsigned;
// p.header.marker = true;
// rtpServer.server.send(p.serialize(), serverRtp, ip);
// return;
// }
negotiate: async msection => {
const check = msection.rtpmap;
const channels = check.channels || 1;
return !!availableMatches.find(rtpmap => {
if (check.codec !== rtpmap.codec)
return false;
if (channels !== (rtpmap.channels || 1))
return false;
if (check.clock !== rtpmap.clock)
return false;
payloadType = check.payloadType;
// this default check should maybe be in sdp-utils.ts.
if (payloadType === undefined)
payloadType = 8;
return true;
});
},
onRtp: rtp => {
const p = RtpPacket.deSerialize(rtp);
if (!pending) {
@@ -206,7 +150,8 @@ export class OnvifIntercom implements Intercom {
return;
}
if (pending.payload.length + p.payload.length < 1024) {
const elapsedRtpTimeMs = Math.abs(pending.header.timestamp - p.header.timestamp) / 8000 * 1000;
if (elapsedRtpTimeMs <= 60) {
pending.payload = Buffer.concat([pending.payload, p.payload]);
return;
}
@@ -224,14 +169,14 @@ export class OnvifIntercom implements Intercom {
pending = p;
},
codecCopy: codec.ffmpegCodec,
codecCopy: 'ffmpeg',
payloadType,
ssrc,
packetSize: 1024,
encoderArguments: [
'-acodec', codec.ffmpegCodec,
'-ar', match.sampleRate,
'-ac', match.channels || '1',
'-acodec', defaultMatch.ffmpegEncoder,
'-ar', defaultMatch.clock.toString(),
'-ac', defaultMatch.channels?.toString() || '1',
],
}
});

View File

@@ -129,7 +129,7 @@ export async function createTrackForwarder(options: {
if (!maximumCompatibilityMode) {
let found: RTCRtpCodecParameters;
if (mediaStreamOptions?.audio?.codec === 'pcm_ulaw') {
if (mediaStreamOptions?.audio?.codec === 'pcm_mulaw') {
found = audioTransceiver.codecs.find(codec => codec.mimeType === 'audio/PCMU')
}
else if (mediaStreamOptions?.audio?.codec === 'pcm_alaw') {

View File

@@ -2,7 +2,7 @@ import { Deferred } from "@scrypted/common/src/deferred";
import { closeQuiet, createBindZero, listenZeroSingleClient } from "@scrypted/common/src/listen-cluster";
import { ffmpegLogInitialOutput, safeKillFFmpeg, safePrintFFmpegArguments } from "@scrypted/common/src/media-helpers";
import { RtspClient, RtspServer, RtspServerResponse, RtspStatusError } from "@scrypted/common/src/rtsp-server";
import { MSection, addTrackControls, parseSdp, replaceSectionPort } from "@scrypted/common/src/sdp-utils";
import { MSection, RTPMap, addTrackControls, parseSdp, replaceSectionPort } from "@scrypted/common/src/sdp-utils";
import sdk, { FFmpegInput } from "@scrypted/sdk";
import child_process, { ChildProcess } from 'child_process';
import dgram from 'dgram';
@@ -16,6 +16,7 @@ type StringWithAutocomplete<T> = T | (string & Record<never, never>);
export type RtpCodecCopy = StringWithAutocomplete<"copy">;
export interface RtpTrack {
negotiate?: (msection: MSection) => Promise<boolean>;
codecCopy?: RtpCodecCopy;
ffmpegDestination?: string;
packetSize?: number;
@@ -164,7 +165,7 @@ export async function startRtpForwarderProcess(console: Console, ffmpegInput: FF
if (ffmpegInput.url
&& isRtsp
&& isCodecCopy(videoCodec, ffmpegInput.mediaStreamOptions?.video?.codec)) {
&& (!video || isCodecCopy(videoCodec, ffmpegInput.mediaStreamOptions?.video?.codec))) {
// console.log('video codec matched:', rtpTracks.video.codecCopy);
@@ -178,30 +179,45 @@ export async function startRtpForwarderProcess(console: Console, ffmpegInput: FF
const describe = await rtspClient.describe();
rtspSdp = describe.body.toString();
const parsedSdp = parseSdp(rtspSdp);
const videoSection = parsedSdp.msections.find(msection => msection.type === 'video' && (msection.codec === videoCodec || videoCodec === 'copy'));
// maybe fallback to udp forwarding/transcoding?
if (!videoSection)
throw new Error(`advertised video codec ${videoCodec} not found in sdp.`);
if (!videoSection.codec) {
console.warn('Unable to determine sdpvideo codec? Please report this to @koush on Discord.');
console.warn(rtspSdp);
}
videoSectionDeferred.resolve(videoSection);
let videoSection: MSection;
let channel = 0;
await setupRtspClient(console, rtspClient, channel, videoSection, rtspClientForceTcp, createPacketDelivery(video));
channel += 2;
const audioSection = parsedSdp.msections.find(msection => msection.type === 'audio' && (msection.codec === audioCodec || audioCodec === 'copy'));
if (video) {
videoSection = parsedSdp.msections.find(msection => msection.type === 'video' && (msection.codec === videoCodec || videoCodec === 'copy'));
// maybe fallback to udp forwarding/transcoding?
if (!videoSection)
throw new Error(`advertised video codec ${videoCodec} not found in sdp.`);
console.log('a/v', videoCodec, audioCodec, 'found', videoSection.codec, audioSection?.codec);
if (!videoSection.codec) {
console.warn('Unable to determine sdpvideo codec? Please report this to @koush on Discord.');
console.warn(rtspSdp);
}
videoSectionDeferred.resolve(videoSection);
await setupRtspClient(console, rtspClient, channel, videoSection, rtspClientForceTcp, createPacketDelivery(video));
channel += 2;
}
else {
videoSectionDeferred.resolve(undefined);
}
const audioSections = parsedSdp.msections.filter(msection => msection.type === 'audio');
let audioSection = audioSections.find(msection => msection.codec === audioCodec || audioCodec === 'copy');
if (!audioSection) {
for (const check of audioSections) {
if (await audio.negotiate?.(check) === true) {
audioSection = check;
break;
}
}
}
console.log('a/v', videoCodec, audioCodec, 'found', videoSection?.codec, audioSection?.codec);
if (audio) {
if (audioSection
&& isCodecCopy(audioCodec, audioSection?.codec)) {
if (audioSection) {
// console.log('audio codec matched:', audio.codecCopy);
@@ -216,9 +232,7 @@ export async function startRtpForwarderProcess(console: Console, ffmpegInput: FF
// console.log('audio codec transcoding:', audio.codecCopy);
const newSdp = parseSdp(rtspSdp);
let audioSection = newSdp.msections.find(msection => msection.type === 'audio' && msection.codec === audioCodec)
if (!audioSection)
audioSection = newSdp.msections.find(msection => msection.type === 'audio');
const audioSection = newSdp.msections.find(msection => msection.type === 'audio');
if (!audioSection) {
delete rtpTracks.audio;

View File

@@ -50,6 +50,7 @@ export class ScryptedSessionControl implements RTCSessionControl {
const url = rtspTcpServer.url.replace('tcp:', 'rtsp:');
const ffmpegInput: FFmpegInput = {
container: 'rtsp',
url,
mediaStreamOptions: {
id: undefined,
@@ -65,38 +66,45 @@ export class ScryptedSessionControl implements RTCSessionControl {
const mo = await mediaManager.createFFmpegMediaObject(ffmpegInput);
await this.intercom.startIntercom(mo);
rtspTcpServer.clientPromise.then(async client => {
const sdpReturnAudio = [
"v=0",
"o=- 0 0 IN IP4 127.0.0.1",
"s=" + "WebRTC Audio Talkback",
"c=IN IP4 127.0.0.1",
"t=0 0",
"b=AS:24",
const client = await rtspTcpServer.clientPromise;
// HACK, this may not be opus
"m=audio 0 RTP/AVP 110",
"a=rtpmap:110 opus/48000/2",
"a=fmtp:101 minptime=10;useinbandfec=1",
const sdpReturnAudio = [
"v=0",
"o=- 0 0 IN IP4 127.0.0.1",
"s=" + "WebRTC Audio Talkback",
"c=IN IP4 127.0.0.1",
"t=0 0",
"m=audio 0 RTP/AVP 110",
"b=AS:24",
// HACK, this may not be opus
"a=rtpmap:110 opus/48000/2",
"a=fmtp:101 minptime=10;useinbandfec=1",
];
let sdp = sdpReturnAudio.join('\r\n');
sdp = createSdpInput(0, 0, sdp);
// "m=audio 0 RTP/AVP 0",
// "a=rtpmap:0 PCMU/8000",
// "m=audio 0 RTP/AVP 8",
// "a=rtpmap:8 PCMA/8000",
];
let sdp = sdpReturnAudio.join('\r\n');
sdp = createSdpInput(0, 0, sdp);
const rtspServer = new RtspServer(client, sdp);
this.rtspServer = rtspServer;
// rtspServer.console = console;
await rtspServer.handlePlayback();
const parsedSdp = parseSdp(rtspServer.sdp);
const audioTrack = parsedSdp.msections.find(msection => msection.type === 'audio').control;
const rtspServer = new RtspServer(client, sdp);
this.rtspServer = rtspServer;
rtspServer.console = console;
await rtspServer.handlePlayback();
const parsedSdp = parseSdp(rtspServer.sdp);
const audioTrack = parsedSdp.msections.find(msection => msection.type === 'audio').control;
track.onReceiveRtp.subscribe(rtpPacket => {
rtpPacket.header.payloadType = 110;
rtspServer.sendTrack(audioTrack, rtpPacket.serialize(), false);
track.onReceiveRtp.subscribe(rtpPacket => {
rtpPacket.header.payloadType = 110;
rtspServer.sendTrack(audioTrack, rtpPacket.serialize(), false);
});
});
await this.intercom.startIntercom(mo);
await rtspTcpServer.clientPromise;
return mo;
}

View File

@@ -50,7 +50,7 @@ export function getAudioCodec(outputCodecParameters: RTCRtpCodecParameters) {
}
if (outputCodecParameters.name === 'PCMU') {
return {
name: 'pcm_ulaw',
name: 'pcm_mulaw',
encoder: 'pcm_mulaw',
};
}
@@ -72,7 +72,7 @@ export function getFFmpegRtpAudioOutputArguments(inputCodec: string, outputCodec
ret.push(
'-acodec', encoder,
'-flags', '+global_header',
'-ar', '48k',
'-ar', `${outputCodecParameters.clockRate}`,
// choose a better birate? this is on the high end recommendation for voice.
'-b:a', '40k',
'-bufsize', '96k',