Преобразование буфера PCM в режиме реального времени в данные AAC для iOS с использованием Remote IO и Audio Convert Service

Я использую Remote IO для получения аудиобуфера от PCM, я хочу в режиме реального времени отправлять данные на Darwin Server через сотовую сеть (сеть 3G). Я выбираю формат AAC, так как есть статья от Фраунгофера под названием "Аудиосвязь на основе AAC-ELD на iOS A Руководство разработчика". Пример кода отлично работает. Звук записывается в формате LPCM, кодируется в AACELD и декодируется обратно в LPCM, и, наконец, немедленно выполняется воспроизведение, но это формат AACELD(Enhanced Low Delay). Когда я меняю формат с "kAudioFormatMPEG4AAC_ELD" на "kAudioFormatMPEG4AAC". Я слышу звук в течение 1 секунды, и звук застревает в течение следующих 1 секунды, и паттерн продолжается. И звук в два раза чаще, чем реальность, что означает, что звук в течение последней 1 секунды в реальном мире будет длиться только 0,5 секунды для воспроизведения. Затем я изменяю размер кадра сэмпла с 512 до 1024. Частота нормальная, но я слышу звук в течение 2 секунд, и он застревает в течение следующих 2 секунд, а паттерн продолжается... Я выяснил, что функция AudioConverterFillComplexBuffer не работает для 2 секунды, а затем хорошо работает в следующие 2 секунды. Я не знаю почему. Пожалуйста помоги. Заранее спасибо. Я действительно не сильно изменил код, просто изменил formatID и размер кадра образца с 512 на 1024. Статья здесь: http://www.full-hd-voice.com/content/dam/fullhdvoice/documents/iOS-ACE-AP-v2.pdf - ACE-v2.pdf

1. глобальные переменные

static AudioBuffer            g_inputBuffer;
static AudioBuffer            g_outputBuffer;
static AudioComponentInstance g_audioUnit;
static AudioUnitElement       g_outputBus      = 0; 
static AudioUnitElement       g_inputBus       = 1;
static UInt32                 g_outChannels    = 2;
static UInt32                 g_inChannels     = 1;
static UInt32                 g_frameSize      = 1024;
static UInt32                 g_inputByteSize  = 0; 
static UInt32                 g_outputByteSize = 0; 
static unsigned int           g_initialized    = 0;
static AACELDEncoder         *g_encoder        = NULL;
static AACELDDecoder         *g_decoder        = NULL;
static MagicCookie            g_cookie;

/* Structure to keep the encoder configuration */
typedef struct EncoderProperties_
{
  Float64 samplingRate;
  UInt32  inChannels;
  UInt32  outChannels;
  UInt32  frameSize;
  UInt32  bitrate;
} EncoderProperties;

/* Structure to keep the magic cookie */
typedef struct MagicCookie_
{
  void *data;
  int byteSize;
} MagicCookie;

/* Structure to keep one encoded AU */
typedef struct EncodedAudioBuffer_
{
  UInt32 mChannels;
  UInt32 mDataBytesSize;
  void *data;
} EncodedAudioBuffer;

typedef struct DecoderProperties_
{
  Float64 samplingRate;
  UInt32  inChannels;
  UInt32  outChannels;
  UInt32  frameSize;
} DecoderProperties;

2.инициализировать аудио сессию и аудиоустройство и кодер и декодер

void InitAudioUnit()

{
  /* Calculate the required input and output buffer sizes */
  g_inputByteSize  = g_frameSize * g_inChannels  * sizeof(AudioSampleType);
  g_outputByteSize = g_frameSize * g_outChannels * sizeof(AudioSampleType);

  /* Initialize the I/O buffers */
  g_inputBuffer.mNumberChannels = g_inChannels;
  g_inputBuffer.mDataByteSize   = g_inputByteSize;

  if (g_initialized)
    free(g_inputBuffer.mData);
  g_inputBuffer.mData           = malloc(sizeof(unsigned char)*g_inputByteSize);
  memset(g_inputBuffer.mData, 0, g_inputByteSize);

  g_outputBuffer.mNumberChannels = g_outChannels;
  g_outputBuffer.mDataByteSize   = g_outputByteSize;
  if (g_initialized)
    free(g_outputBuffer.mData);
  g_outputBuffer.mData           = malloc(sizeof(unsigned char)*g_outputByteSize);
  memset(g_outputBuffer.mData, 0, g_outputByteSize);
  g_initialized = 1;

  /* Initialize the audio session */
  AudioSessionInitialize(NULL, NULL, interruptionListener, NULL);
  /* Activate the audio session */
  AudioSessionSetActive(TRUE);

  /* Enable recording for full-duplex I/O */
  UInt32 audioCategory = kAudioSessionCategory_PlayAndRecord;
  AudioSessionSetProperty(kAudioSessionProperty_AudioCategory, 
                          sizeof(audioCategory), 
                          &audioCategory);
  /* Set the route change listener */
  AudioSessionAddPropertyListener(kAudioSessionProperty_AudioRouteChange, 
                                  routeChangeListener, 
                                  NULL);

  /* Set the preferred buffer time */
  Float32 preferredBufferTime = 1024.0 / 44100.0;
  AudioSessionSetProperty(kAudioSessionProperty_PreferredHardwareIOBufferDuration, 
                          sizeof(preferredBufferTime), 
                          &preferredBufferTime);

  /* Setup the audio component for I/O */
  AudioComponentDescription componentDesc;
  memset(&componentDesc, 0, sizeof(componentDesc));

  componentDesc.componentType         = kAudioUnitType_Output;
  componentDesc.componentSubType      = kAudioUnitSubType_RemoteIO; 
  componentDesc.componentManufacturer = kAudioUnitManufacturer_Apple;

  /* Find and create the audio component */
  AudioComponent auComponent = AudioComponentFindNext(NULL, &componentDesc);
  AudioComponentInstanceNew(auComponent, &g_audioUnit);

  /* Enable the audio input */
  UInt32 enableAudioInput = 1;
  AudioUnitSetProperty(g_audioUnit, 
                       kAudioOutputUnitProperty_EnableIO, 
                       kAudioUnitScope_Input, 
                       g_inputBus, 
                       &enableAudioInput, 
                       sizeof(enableAudioInput));

  /* Setup the render callback */
  AURenderCallbackStruct renderCallbackInfo;
  renderCallbackInfo.inputProc       = audioUnitRenderCallback;
  renderCallbackInfo.inputProcRefCon = NULL;
  AudioUnitSetProperty(g_audioUnit, 
                       kAudioUnitProperty_SetRenderCallback, 
                       kAudioUnitScope_Input, 
                       g_outputBus, 
                       &renderCallbackInfo, 
                       sizeof(renderCallbackInfo));

  /* Set the input and output audio stream formats */
  AudioStreamBasicDescription audioFormat;
  audioFormat.mSampleRate       = 44100;
  audioFormat.mFormatID         = kAudioFormatLinearPCM;
  audioFormat.mFormatFlags      = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
  audioFormat.mFramesPerPacket  = 1;
  audioFormat.mBitsPerChannel   = 8 * sizeof(AudioSampleType);
  audioFormat.mChannelsPerFrame = g_inChannels;
  audioFormat.mBytesPerFrame    = audioFormat.mChannelsPerFrame * sizeof(AudioSampleType);
  audioFormat.mBytesPerPacket   = audioFormat.mBytesPerFrame;

  AudioUnitSetProperty(g_audioUnit, 
                       kAudioUnitProperty_StreamFormat, 
                       kAudioUnitScope_Output, 
                       g_inputBus, 
                       &audioFormat, 
                       sizeof(audioFormat));

  audioFormat.mChannelsPerFrame = g_outChannels;
  audioFormat.mBytesPerFrame    = audioFormat.mChannelsPerFrame * sizeof(AudioSampleType);
  audioFormat.mBytesPerPacket   = audioFormat.mBytesPerFrame;

  AudioUnitSetProperty(g_audioUnit, 
                       kAudioUnitProperty_StreamFormat, 
                       kAudioUnitScope_Input, 
                       g_outputBus, 
                       &audioFormat, 
                       sizeof(audioFormat));

  /* Initialize the ELD codec */
  InitAACELD();
}

void InitAACELD()
{
  EncoderProperties p;
  p.samplingRate = 44100.0;
  p.inChannels   = 1;
  p.outChannels  = 1;
  p.frameSize    = 1024;
  p.bitrate      = 32000;

  g_encoder = CreateAACELDEncoder();
  InitAACELDEncoder(g_encoder, p, &g_cookie);

  DecoderProperties dp;
  dp.samplingRate = 44100.0;
  dp.inChannels   = 1;
  dp.outChannels  = 2;
  dp.frameSize    = p.frameSize;

  g_decoder = CreateAACELDDecoder();
  InitAACELDDecoder(g_decoder, dp, &g_cookie);
}

int InitAACELDEncoder(AACELDEncoder *encoder, EncoderProperties props, MagicCookie *outCookie)
{
  /* Copy the provided encoder properties */
  encoder->inChannels   = props.inChannels;
  encoder->outChannels  = props.outChannels;
  encoder->samplingRate = props.samplingRate;
  encoder->frameSize    = props.frameSize;
  encoder->bitrate      = props.bitrate;

  /* Convenience macro to fill out the ASBD structure.
     Available only when __cplusplus is defined! */
  FillOutASBDForLPCM(encoder->sourceFormat, 
                     encoder->samplingRate, 
                     encoder->inChannels, 
                     8*sizeof(AudioSampleType), 
                     8*sizeof(AudioSampleType), 
                     false, 
                     false);

  /* Set the format parameters for AAC-ELD encoding. */
  encoder->destinationFormat.mFormatID         = kAudioFormatMPEG4AAC;
  encoder->destinationFormat.mChannelsPerFrame = encoder->outChannels;
  encoder->destinationFormat.mSampleRate       = encoder->samplingRate;

  /* Get the size of the formatinfo structure */
  UInt32 dataSize = sizeof(encoder->destinationFormat);

  /* Request the propertie from CoreAudio */
  AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 
                         0, 
                         NULL, 
                         &dataSize, 
                         &(encoder->destinationFormat));

  /* Create a new audio converter */
  AudioConverterNew(&(encoder->sourceFormat), 
                    &(encoder->destinationFormat), 
                    &(encoder->audioConverter));

  if (!encoder->audioConverter)
  {
    return -1;
  }

  /* Try to set the desired output bitrate */
  UInt32 outputBitrate = encoder->bitrate;
  dataSize = sizeof(outputBitrate);

  AudioConverterSetProperty(encoder->audioConverter, 
                            kAudioConverterEncodeBitRate, 
                            dataSize, 
                            &outputBitrate);

  /* Query the maximum possible output packet size */
  if (encoder->destinationFormat.mBytesPerPacket == 0) 
  {
    UInt32 maxOutputSizePerPacket = 0;
    dataSize = sizeof(maxOutputSizePerPacket);
    AudioConverterGetProperty(encoder->audioConverter, 
                              kAudioConverterPropertyMaximumOutputPacketSize, 
                              &dataSize, 
                              &maxOutputSizePerPacket);
    encoder->maxOutputPacketSize = maxOutputSizePerPacket;
  }
  else
  {
    encoder->maxOutputPacketSize = encoder->destinationFormat.mBytesPerPacket;
  }

  /* Fetch the Magic Cookie from the ELD implementation */
  UInt32 cookieSize = 0;
  AudioConverterGetPropertyInfo(encoder->audioConverter, 
                                kAudioConverterCompressionMagicCookie, 
                                &cookieSize, 
                                NULL);

  char* cookie = (char*)malloc(cookieSize*sizeof(char));
  AudioConverterGetProperty(encoder->audioConverter, 
                            kAudioConverterCompressionMagicCookie, 
                            &cookieSize, 
                            cookie);

  outCookie->data     = cookie;
  outCookie->byteSize = cookieSize;

  /* Prepare the temporary AU buffer for encoding */
  encoder->encoderBuffer = malloc(encoder->maxOutputPacketSize);

  return 0;
}

int InitAACELDDecoder(AACELDDecoder* decoder, DecoderProperties props, const MagicCookie *cookie)
{
  /* Copy the provided decoder properties */
  decoder->inChannels   = props.inChannels;
  decoder->outChannels  = props.outChannels;
  decoder->samplingRate = props.samplingRate;
  decoder->frameSize    = props.frameSize;

  /* We will decode to LPCM */
  FillOutASBDForLPCM(decoder->destinationFormat, 
                     decoder->samplingRate, 
                     decoder->outChannels, 
                     8*sizeof(AudioSampleType), 
                     8*sizeof(AudioSampleType), 
                     false, 
                     false);

  /* from AAC-ELD, having the same sampling rate, but possibly a different channel configuration */
  decoder->sourceFormat.mFormatID         = kAudioFormatMPEG4AAC;
  decoder->sourceFormat.mChannelsPerFrame = decoder->inChannels;
  decoder->sourceFormat.mSampleRate       = decoder->samplingRate;

  /* Get the rest of the format info */
  UInt32 dataSize = sizeof(decoder->sourceFormat);
  AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 
                         0, 
                         NULL, 
                         &dataSize, 
                         &(decoder->sourceFormat));

  /* Create a new AudioConverter instance for the conversion AAC-ELD -> LPCM */
  AudioConverterNew(&(decoder->sourceFormat), 
                    &(decoder->destinationFormat), 
                    &(decoder->audioConverter));

  if (!decoder->audioConverter)
  {
    return -1;
  }

  /* Check for variable output packet size */
  if (decoder->destinationFormat.mBytesPerPacket == 0) 
  {
    UInt32 maxOutputSizePerPacket = 0;
    dataSize = sizeof(maxOutputSizePerPacket);
    AudioConverterGetProperty(decoder->audioConverter,
                              kAudioConverterPropertyMaximumOutputPacketSize, 
                              &dataSize, 
                              &maxOutputSizePerPacket);
    decoder->maxOutputPacketSize = maxOutputSizePerPacket;
  }
  else
  {
    decoder->maxOutputPacketSize = decoder->destinationFormat.mBytesPerPacket;
  }

  /* Set the corresponding encoder cookie */
  AudioConverterSetProperty(decoder->audioConverter, 
                            kAudioConverterDecompressionMagicCookie, 
                            cookie->byteSize, 
                            cookie->data);

  return 0;
}

3. Обратный звонок и кодировщик и декодер

static OSStatus audioUnitRenderCallback(void                       *inRefCon, 
                                        AudioUnitRenderActionFlags *ioActionFlags,
                                        const AudioTimeStamp       *inTimeStamp,
                                        UInt32                      inBusNumber,
                                        UInt32                      inNumberOfFrames,
                                        AudioBufferList            *ioData)
{

  /* Get the input samples */
  AudioUnitRender(g_audioUnit,
                  ioActionFlags,
                  inTimeStamp,
                  g_inputBus,
                  inNumberOfFrames,
                  ioData);

  /* Copy to global input buffer */
  memcpy(g_inputBuffer.mData, ioData->mBuffers[0].mData, g_inputBuffer.mDataByteSize);

  /* Encode with AudioConverter */
  EncodedAudioBuffer encodedAU;
  EncodeAACELD(g_encoder, &g_inputBuffer, &encodedAU);

  /* Decode with AudioConverter */
  g_outputBuffer.mDataByteSize = g_outputByteSize;
  DecodeAACELD(g_decoder, &encodedAU, &g_outputBuffer);

  /* Copy output samples to Audio Units' IO buffer */
  ioData->mBuffers[0].mNumberChannels = g_outputBuffer.mNumberChannels;
  ioData->mBuffers[0].mDataByteSize   = g_outputBuffer.mDataByteSize;
  memcpy(ioData->mBuffers[0].mData, g_outputBuffer.mData, g_outputBuffer.mDataByteSize); 

  return noErr;
}

static OSStatus encodeProc(AudioConverterRef inAudioConverter, 
                           UInt32 *ioNumberDataPackets, 
                           AudioBufferList *ioData, 
                           AudioStreamPacketDescription **outDataPacketDescription, 
                           void *inUserData)
{
  /* Get the current encoder state from the inUserData parameter */
  AACELDEncoder *encoder = (AACELDEncoder*) inUserData;

  /* Compute the maximum number of output packets */
  UInt32 maxPackets = encoder->bytesToEncode / encoder->sourceFormat.mBytesPerPacket;

  if (*ioNumberDataPackets > maxPackets)
  {
    /* If requested number of packets is bigger, adjust */
    *ioNumberDataPackets = maxPackets;
  }

  /* Check to make sure we have only one audio buffer */
  if (ioData->mNumberBuffers != 1)
  {
    return 1;
  }

  /* Set the data to be encoded */
  ioData->mBuffers[0].mDataByteSize   = encoder->currentSampleBuffer->mDataByteSize;
  ioData->mBuffers[0].mData           = encoder->currentSampleBuffer->mData;
  ioData->mBuffers[0].mNumberChannels = encoder->currentSampleBuffer->mNumberChannels;

  if (outDataPacketDescription)
  {
    *outDataPacketDescription = NULL;
  }

  if (encoder->bytesToEncode == 0)
  {
    // We are currently out of data but want to keep on processing 
    // See Apple Technical Q&A QA1317
    return 1; 
  }

  encoder->bytesToEncode = 0;


  return noErr;
}


int EncodeAACELD(AACELDEncoder *encoder, AudioBuffer *inSamples, EncodedAudioBuffer *outData)
{
  /* Clear the encoder buffer */
  memset(encoder->encoderBuffer, 0, sizeof(encoder->maxOutputPacketSize));

  /* Keep a reference to the samples that should be encoded */
  encoder->currentSampleBuffer = inSamples;
  encoder->bytesToEncode       = inSamples->mDataByteSize;

  UInt32 numOutputDataPackets = 1;

  AudioStreamPacketDescription outPacketDesc[1];

  /* Create the output buffer list */
  AudioBufferList outBufferList;
  outBufferList.mNumberBuffers = 1;
  outBufferList.mBuffers[0].mNumberChannels = encoder->outChannels;
  outBufferList.mBuffers[0].mDataByteSize   = encoder->maxOutputPacketSize;
  outBufferList.mBuffers[0].mData           = encoder->encoderBuffer;

  /* Start the encoding process */
  OSStatus status = AudioConverterFillComplexBuffer(encoder->audioConverter,
                                                    encodeProc, 
                                                    encoder, 
                                                    &numOutputDataPackets, 
                                                    &outBufferList, 
                                                    outPacketDesc);

  if (status != noErr)
  {
    return -1;
  }

  /* Set the ouput data */
  outData->mChannels      = encoder->outChannels;
  outData->data           = encoder->encoderBuffer;
  outData->mDataBytesSize = outPacketDesc[0].mDataByteSize;

  return 0;
}


static OSStatus decodeProc(AudioConverterRef inAudioConverter, 
                           UInt32 *ioNumberDataPackets, 
                           AudioBufferList *ioData, 
                           AudioStreamPacketDescription **outDataPacketDescription, 
                           void *inUserData)
{
  /* Get the current decoder state from the inUserData parameter */
  AACELDDecoder *decoder = (AACELDDecoder*)inUserData;

  /* Compute the maximum number of output packets */
  UInt32 maxPackets = decoder->bytesToDecode / decoder->maxOutputPacketSize;

  if (*ioNumberDataPackets > maxPackets)
  {
    /* If requested number of packets is bigger, adjust */
    *ioNumberDataPackets = maxPackets;
  }

  /* If there is data to be decoded, set it accordingly */
  if (decoder->bytesToDecode)
  {
    ioData->mBuffers[0].mData           = decoder->decodeBuffer;
    ioData->mBuffers[0].mDataByteSize   = decoder->bytesToDecode;
    ioData->mBuffers[0].mNumberChannels = decoder->inChannels;
  } 

  /* And set the packet description */
  if (outDataPacketDescription)
  {
    decoder->packetDesc[0].mStartOffset            = 0;
    decoder->packetDesc[0].mVariableFramesInPacket = 0;
    decoder->packetDesc[0].mDataByteSize           = decoder->bytesToDecode;

    (*outDataPacketDescription) = decoder->packetDesc;
  }

  if (decoder->bytesToDecode == 0)
  {
    // We are currently out of data but want to keep on processing 
    // See Apple Technical Q&A QA1317
    return 1;
  }

  decoder->bytesToDecode = 0;

  return noErr;
}

int DecodeAACELD(AACELDDecoder* decoder, EncodedAudioBuffer *inData, AudioBuffer *outSamples)
{
  OSStatus status = noErr;

  /* Keep a reference to the samples that should be decoded */
  decoder->decodeBuffer  = inData->data;
  decoder->bytesToDecode = inData->mDataBytesSize;

  UInt32 outBufferMaxSizeBytes = decoder->frameSize * decoder->outChannels * sizeof(AudioSampleType);

  assert(outSamples->mDataByteSize <= outBufferMaxSizeBytes);

  UInt32 numOutputDataPackets = outBufferMaxSizeBytes / decoder->maxOutputPacketSize;

  /* Output packet stream are 512 LPCM samples */
  AudioStreamPacketDescription outputPacketDesc[1024];

  /* Create the output buffer list */
  AudioBufferList outBufferList;
  outBufferList.mNumberBuffers = 1;
  outBufferList.mBuffers[0].mNumberChannels = decoder->outChannels;
  outBufferList.mBuffers[0].mDataByteSize   = outSamples->mDataByteSize;
  outBufferList.mBuffers[0].mData           = outSamples->mData;

  /* Start the decoding process */
  status = AudioConverterFillComplexBuffer(decoder->audioConverter, 
                                           decodeProc, 
                                           decoder, 
                                           &numOutputDataPackets, 
                                           &outBufferList, 
                                           outputPacketDesc);

  if (noErr != status)
  {
    return -1;
  }

  return 0;
}

0 ответов

Другие вопросы по тегам