Преобразование буфера PCM в режиме реального времени в данные AAC для iOS с использованием Remote IO и Audio Convert Service
Я использую Remote IO для получения аудиобуфера от PCM, я хочу в режиме реального времени отправлять данные на Darwin Server через сотовую сеть (сеть 3G). Я выбираю формат AAC, так как есть статья от Фраунгофера под названием "Аудиосвязь на основе AAC-ELD на iOS A Руководство разработчика". Пример кода отлично работает. Звук записывается в формате LPCM, кодируется в AACELD и декодируется обратно в LPCM, и, наконец, немедленно выполняется воспроизведение, но это формат AACELD(Enhanced Low Delay). Когда я меняю формат с "kAudioFormatMPEG4AAC_ELD" на "kAudioFormatMPEG4AAC". Я слышу звук в течение 1 секунды, и звук застревает в течение следующих 1 секунды, и паттерн продолжается. И звук в два раза чаще, чем реальность, что означает, что звук в течение последней 1 секунды в реальном мире будет длиться только 0,5 секунды для воспроизведения. Затем я изменяю размер кадра сэмпла с 512 до 1024. Частота нормальная, но я слышу звук в течение 2 секунд, и он застревает в течение следующих 2 секунд, а паттерн продолжается... Я выяснил, что функция AudioConverterFillComplexBuffer не работает для 2 секунды, а затем хорошо работает в следующие 2 секунды. Я не знаю почему. Пожалуйста помоги. Заранее спасибо. Я действительно не сильно изменил код, просто изменил formatID и размер кадра образца с 512 на 1024. Статья здесь: http://www.full-hd-voice.com/content/dam/fullhdvoice/documents/iOS-ACE-AP-v2.pdf - ACE-v2.pdf
1. глобальные переменные
static AudioBuffer g_inputBuffer;
static AudioBuffer g_outputBuffer;
static AudioComponentInstance g_audioUnit;
static AudioUnitElement g_outputBus = 0;
static AudioUnitElement g_inputBus = 1;
static UInt32 g_outChannels = 2;
static UInt32 g_inChannels = 1;
static UInt32 g_frameSize = 1024;
static UInt32 g_inputByteSize = 0;
static UInt32 g_outputByteSize = 0;
static unsigned int g_initialized = 0;
static AACELDEncoder *g_encoder = NULL;
static AACELDDecoder *g_decoder = NULL;
static MagicCookie g_cookie;
/* Structure to keep the encoder configuration */
typedef struct EncoderProperties_
{
Float64 samplingRate;
UInt32 inChannels;
UInt32 outChannels;
UInt32 frameSize;
UInt32 bitrate;
} EncoderProperties;
/* Structure to keep the magic cookie */
typedef struct MagicCookie_
{
void *data;
int byteSize;
} MagicCookie;
/* Structure to keep one encoded AU */
typedef struct EncodedAudioBuffer_
{
UInt32 mChannels;
UInt32 mDataBytesSize;
void *data;
} EncodedAudioBuffer;
typedef struct DecoderProperties_
{
Float64 samplingRate;
UInt32 inChannels;
UInt32 outChannels;
UInt32 frameSize;
} DecoderProperties;
2.инициализировать аудио сессию и аудиоустройство и кодер и декодер
void InitAudioUnit()
{
/* Calculate the required input and output buffer sizes */
g_inputByteSize = g_frameSize * g_inChannels * sizeof(AudioSampleType);
g_outputByteSize = g_frameSize * g_outChannels * sizeof(AudioSampleType);
/* Initialize the I/O buffers */
g_inputBuffer.mNumberChannels = g_inChannels;
g_inputBuffer.mDataByteSize = g_inputByteSize;
if (g_initialized)
free(g_inputBuffer.mData);
g_inputBuffer.mData = malloc(sizeof(unsigned char)*g_inputByteSize);
memset(g_inputBuffer.mData, 0, g_inputByteSize);
g_outputBuffer.mNumberChannels = g_outChannels;
g_outputBuffer.mDataByteSize = g_outputByteSize;
if (g_initialized)
free(g_outputBuffer.mData);
g_outputBuffer.mData = malloc(sizeof(unsigned char)*g_outputByteSize);
memset(g_outputBuffer.mData, 0, g_outputByteSize);
g_initialized = 1;
/* Initialize the audio session */
AudioSessionInitialize(NULL, NULL, interruptionListener, NULL);
/* Activate the audio session */
AudioSessionSetActive(TRUE);
/* Enable recording for full-duplex I/O */
UInt32 audioCategory = kAudioSessionCategory_PlayAndRecord;
AudioSessionSetProperty(kAudioSessionProperty_AudioCategory,
sizeof(audioCategory),
&audioCategory);
/* Set the route change listener */
AudioSessionAddPropertyListener(kAudioSessionProperty_AudioRouteChange,
routeChangeListener,
NULL);
/* Set the preferred buffer time */
Float32 preferredBufferTime = 1024.0 / 44100.0;
AudioSessionSetProperty(kAudioSessionProperty_PreferredHardwareIOBufferDuration,
sizeof(preferredBufferTime),
&preferredBufferTime);
/* Setup the audio component for I/O */
AudioComponentDescription componentDesc;
memset(&componentDesc, 0, sizeof(componentDesc));
componentDesc.componentType = kAudioUnitType_Output;
componentDesc.componentSubType = kAudioUnitSubType_RemoteIO;
componentDesc.componentManufacturer = kAudioUnitManufacturer_Apple;
/* Find and create the audio component */
AudioComponent auComponent = AudioComponentFindNext(NULL, &componentDesc);
AudioComponentInstanceNew(auComponent, &g_audioUnit);
/* Enable the audio input */
UInt32 enableAudioInput = 1;
AudioUnitSetProperty(g_audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
g_inputBus,
&enableAudioInput,
sizeof(enableAudioInput));
/* Setup the render callback */
AURenderCallbackStruct renderCallbackInfo;
renderCallbackInfo.inputProc = audioUnitRenderCallback;
renderCallbackInfo.inputProcRefCon = NULL;
AudioUnitSetProperty(g_audioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Input,
g_outputBus,
&renderCallbackInfo,
sizeof(renderCallbackInfo));
/* Set the input and output audio stream formats */
AudioStreamBasicDescription audioFormat;
audioFormat.mSampleRate = 44100;
audioFormat.mFormatID = kAudioFormatLinearPCM;
audioFormat.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
audioFormat.mFramesPerPacket = 1;
audioFormat.mBitsPerChannel = 8 * sizeof(AudioSampleType);
audioFormat.mChannelsPerFrame = g_inChannels;
audioFormat.mBytesPerFrame = audioFormat.mChannelsPerFrame * sizeof(AudioSampleType);
audioFormat.mBytesPerPacket = audioFormat.mBytesPerFrame;
AudioUnitSetProperty(g_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
g_inputBus,
&audioFormat,
sizeof(audioFormat));
audioFormat.mChannelsPerFrame = g_outChannels;
audioFormat.mBytesPerFrame = audioFormat.mChannelsPerFrame * sizeof(AudioSampleType);
audioFormat.mBytesPerPacket = audioFormat.mBytesPerFrame;
AudioUnitSetProperty(g_audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
g_outputBus,
&audioFormat,
sizeof(audioFormat));
/* Initialize the ELD codec */
InitAACELD();
}
void InitAACELD()
{
EncoderProperties p;
p.samplingRate = 44100.0;
p.inChannels = 1;
p.outChannels = 1;
p.frameSize = 1024;
p.bitrate = 32000;
g_encoder = CreateAACELDEncoder();
InitAACELDEncoder(g_encoder, p, &g_cookie);
DecoderProperties dp;
dp.samplingRate = 44100.0;
dp.inChannels = 1;
dp.outChannels = 2;
dp.frameSize = p.frameSize;
g_decoder = CreateAACELDDecoder();
InitAACELDDecoder(g_decoder, dp, &g_cookie);
}
int InitAACELDEncoder(AACELDEncoder *encoder, EncoderProperties props, MagicCookie *outCookie)
{
/* Copy the provided encoder properties */
encoder->inChannels = props.inChannels;
encoder->outChannels = props.outChannels;
encoder->samplingRate = props.samplingRate;
encoder->frameSize = props.frameSize;
encoder->bitrate = props.bitrate;
/* Convenience macro to fill out the ASBD structure.
Available only when __cplusplus is defined! */
FillOutASBDForLPCM(encoder->sourceFormat,
encoder->samplingRate,
encoder->inChannels,
8*sizeof(AudioSampleType),
8*sizeof(AudioSampleType),
false,
false);
/* Set the format parameters for AAC-ELD encoding. */
encoder->destinationFormat.mFormatID = kAudioFormatMPEG4AAC;
encoder->destinationFormat.mChannelsPerFrame = encoder->outChannels;
encoder->destinationFormat.mSampleRate = encoder->samplingRate;
/* Get the size of the formatinfo structure */
UInt32 dataSize = sizeof(encoder->destinationFormat);
/* Request the propertie from CoreAudio */
AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
0,
NULL,
&dataSize,
&(encoder->destinationFormat));
/* Create a new audio converter */
AudioConverterNew(&(encoder->sourceFormat),
&(encoder->destinationFormat),
&(encoder->audioConverter));
if (!encoder->audioConverter)
{
return -1;
}
/* Try to set the desired output bitrate */
UInt32 outputBitrate = encoder->bitrate;
dataSize = sizeof(outputBitrate);
AudioConverterSetProperty(encoder->audioConverter,
kAudioConverterEncodeBitRate,
dataSize,
&outputBitrate);
/* Query the maximum possible output packet size */
if (encoder->destinationFormat.mBytesPerPacket == 0)
{
UInt32 maxOutputSizePerPacket = 0;
dataSize = sizeof(maxOutputSizePerPacket);
AudioConverterGetProperty(encoder->audioConverter,
kAudioConverterPropertyMaximumOutputPacketSize,
&dataSize,
&maxOutputSizePerPacket);
encoder->maxOutputPacketSize = maxOutputSizePerPacket;
}
else
{
encoder->maxOutputPacketSize = encoder->destinationFormat.mBytesPerPacket;
}
/* Fetch the Magic Cookie from the ELD implementation */
UInt32 cookieSize = 0;
AudioConverterGetPropertyInfo(encoder->audioConverter,
kAudioConverterCompressionMagicCookie,
&cookieSize,
NULL);
char* cookie = (char*)malloc(cookieSize*sizeof(char));
AudioConverterGetProperty(encoder->audioConverter,
kAudioConverterCompressionMagicCookie,
&cookieSize,
cookie);
outCookie->data = cookie;
outCookie->byteSize = cookieSize;
/* Prepare the temporary AU buffer for encoding */
encoder->encoderBuffer = malloc(encoder->maxOutputPacketSize);
return 0;
}
int InitAACELDDecoder(AACELDDecoder* decoder, DecoderProperties props, const MagicCookie *cookie)
{
/* Copy the provided decoder properties */
decoder->inChannels = props.inChannels;
decoder->outChannels = props.outChannels;
decoder->samplingRate = props.samplingRate;
decoder->frameSize = props.frameSize;
/* We will decode to LPCM */
FillOutASBDForLPCM(decoder->destinationFormat,
decoder->samplingRate,
decoder->outChannels,
8*sizeof(AudioSampleType),
8*sizeof(AudioSampleType),
false,
false);
/* from AAC-ELD, having the same sampling rate, but possibly a different channel configuration */
decoder->sourceFormat.mFormatID = kAudioFormatMPEG4AAC;
decoder->sourceFormat.mChannelsPerFrame = decoder->inChannels;
decoder->sourceFormat.mSampleRate = decoder->samplingRate;
/* Get the rest of the format info */
UInt32 dataSize = sizeof(decoder->sourceFormat);
AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
0,
NULL,
&dataSize,
&(decoder->sourceFormat));
/* Create a new AudioConverter instance for the conversion AAC-ELD -> LPCM */
AudioConverterNew(&(decoder->sourceFormat),
&(decoder->destinationFormat),
&(decoder->audioConverter));
if (!decoder->audioConverter)
{
return -1;
}
/* Check for variable output packet size */
if (decoder->destinationFormat.mBytesPerPacket == 0)
{
UInt32 maxOutputSizePerPacket = 0;
dataSize = sizeof(maxOutputSizePerPacket);
AudioConverterGetProperty(decoder->audioConverter,
kAudioConverterPropertyMaximumOutputPacketSize,
&dataSize,
&maxOutputSizePerPacket);
decoder->maxOutputPacketSize = maxOutputSizePerPacket;
}
else
{
decoder->maxOutputPacketSize = decoder->destinationFormat.mBytesPerPacket;
}
/* Set the corresponding encoder cookie */
AudioConverterSetProperty(decoder->audioConverter,
kAudioConverterDecompressionMagicCookie,
cookie->byteSize,
cookie->data);
return 0;
}
3. Обратный звонок и кодировщик и декодер
static OSStatus audioUnitRenderCallback(void *inRefCon,
AudioUnitRenderActionFlags *ioActionFlags,
const AudioTimeStamp *inTimeStamp,
UInt32 inBusNumber,
UInt32 inNumberOfFrames,
AudioBufferList *ioData)
{
/* Get the input samples */
AudioUnitRender(g_audioUnit,
ioActionFlags,
inTimeStamp,
g_inputBus,
inNumberOfFrames,
ioData);
/* Copy to global input buffer */
memcpy(g_inputBuffer.mData, ioData->mBuffers[0].mData, g_inputBuffer.mDataByteSize);
/* Encode with AudioConverter */
EncodedAudioBuffer encodedAU;
EncodeAACELD(g_encoder, &g_inputBuffer, &encodedAU);
/* Decode with AudioConverter */
g_outputBuffer.mDataByteSize = g_outputByteSize;
DecodeAACELD(g_decoder, &encodedAU, &g_outputBuffer);
/* Copy output samples to Audio Units' IO buffer */
ioData->mBuffers[0].mNumberChannels = g_outputBuffer.mNumberChannels;
ioData->mBuffers[0].mDataByteSize = g_outputBuffer.mDataByteSize;
memcpy(ioData->mBuffers[0].mData, g_outputBuffer.mData, g_outputBuffer.mDataByteSize);
return noErr;
}
static OSStatus encodeProc(AudioConverterRef inAudioConverter,
UInt32 *ioNumberDataPackets,
AudioBufferList *ioData,
AudioStreamPacketDescription **outDataPacketDescription,
void *inUserData)
{
/* Get the current encoder state from the inUserData parameter */
AACELDEncoder *encoder = (AACELDEncoder*) inUserData;
/* Compute the maximum number of output packets */
UInt32 maxPackets = encoder->bytesToEncode / encoder->sourceFormat.mBytesPerPacket;
if (*ioNumberDataPackets > maxPackets)
{
/* If requested number of packets is bigger, adjust */
*ioNumberDataPackets = maxPackets;
}
/* Check to make sure we have only one audio buffer */
if (ioData->mNumberBuffers != 1)
{
return 1;
}
/* Set the data to be encoded */
ioData->mBuffers[0].mDataByteSize = encoder->currentSampleBuffer->mDataByteSize;
ioData->mBuffers[0].mData = encoder->currentSampleBuffer->mData;
ioData->mBuffers[0].mNumberChannels = encoder->currentSampleBuffer->mNumberChannels;
if (outDataPacketDescription)
{
*outDataPacketDescription = NULL;
}
if (encoder->bytesToEncode == 0)
{
// We are currently out of data but want to keep on processing
// See Apple Technical Q&A QA1317
return 1;
}
encoder->bytesToEncode = 0;
return noErr;
}
int EncodeAACELD(AACELDEncoder *encoder, AudioBuffer *inSamples, EncodedAudioBuffer *outData)
{
/* Clear the encoder buffer */
memset(encoder->encoderBuffer, 0, sizeof(encoder->maxOutputPacketSize));
/* Keep a reference to the samples that should be encoded */
encoder->currentSampleBuffer = inSamples;
encoder->bytesToEncode = inSamples->mDataByteSize;
UInt32 numOutputDataPackets = 1;
AudioStreamPacketDescription outPacketDesc[1];
/* Create the output buffer list */
AudioBufferList outBufferList;
outBufferList.mNumberBuffers = 1;
outBufferList.mBuffers[0].mNumberChannels = encoder->outChannels;
outBufferList.mBuffers[0].mDataByteSize = encoder->maxOutputPacketSize;
outBufferList.mBuffers[0].mData = encoder->encoderBuffer;
/* Start the encoding process */
OSStatus status = AudioConverterFillComplexBuffer(encoder->audioConverter,
encodeProc,
encoder,
&numOutputDataPackets,
&outBufferList,
outPacketDesc);
if (status != noErr)
{
return -1;
}
/* Set the ouput data */
outData->mChannels = encoder->outChannels;
outData->data = encoder->encoderBuffer;
outData->mDataBytesSize = outPacketDesc[0].mDataByteSize;
return 0;
}
static OSStatus decodeProc(AudioConverterRef inAudioConverter,
UInt32 *ioNumberDataPackets,
AudioBufferList *ioData,
AudioStreamPacketDescription **outDataPacketDescription,
void *inUserData)
{
/* Get the current decoder state from the inUserData parameter */
AACELDDecoder *decoder = (AACELDDecoder*)inUserData;
/* Compute the maximum number of output packets */
UInt32 maxPackets = decoder->bytesToDecode / decoder->maxOutputPacketSize;
if (*ioNumberDataPackets > maxPackets)
{
/* If requested number of packets is bigger, adjust */
*ioNumberDataPackets = maxPackets;
}
/* If there is data to be decoded, set it accordingly */
if (decoder->bytesToDecode)
{
ioData->mBuffers[0].mData = decoder->decodeBuffer;
ioData->mBuffers[0].mDataByteSize = decoder->bytesToDecode;
ioData->mBuffers[0].mNumberChannels = decoder->inChannels;
}
/* And set the packet description */
if (outDataPacketDescription)
{
decoder->packetDesc[0].mStartOffset = 0;
decoder->packetDesc[0].mVariableFramesInPacket = 0;
decoder->packetDesc[0].mDataByteSize = decoder->bytesToDecode;
(*outDataPacketDescription) = decoder->packetDesc;
}
if (decoder->bytesToDecode == 0)
{
// We are currently out of data but want to keep on processing
// See Apple Technical Q&A QA1317
return 1;
}
decoder->bytesToDecode = 0;
return noErr;
}
int DecodeAACELD(AACELDDecoder* decoder, EncodedAudioBuffer *inData, AudioBuffer *outSamples)
{
OSStatus status = noErr;
/* Keep a reference to the samples that should be decoded */
decoder->decodeBuffer = inData->data;
decoder->bytesToDecode = inData->mDataBytesSize;
UInt32 outBufferMaxSizeBytes = decoder->frameSize * decoder->outChannels * sizeof(AudioSampleType);
assert(outSamples->mDataByteSize <= outBufferMaxSizeBytes);
UInt32 numOutputDataPackets = outBufferMaxSizeBytes / decoder->maxOutputPacketSize;
/* Output packet stream are 512 LPCM samples */
AudioStreamPacketDescription outputPacketDesc[1024];
/* Create the output buffer list */
AudioBufferList outBufferList;
outBufferList.mNumberBuffers = 1;
outBufferList.mBuffers[0].mNumberChannels = decoder->outChannels;
outBufferList.mBuffers[0].mDataByteSize = outSamples->mDataByteSize;
outBufferList.mBuffers[0].mData = outSamples->mData;
/* Start the decoding process */
status = AudioConverterFillComplexBuffer(decoder->audioConverter,
decodeProc,
decoder,
&numOutputDataPackets,
&outBufferList,
outputPacketDesc);
if (noErr != status)
{
return -1;
}
return 0;
}