Custom Audio Element Problem

paddlesteamer
Posts: 5
Joined: Thu Apr 01, 2021 2:52 pm

Custom Audio Element Problem

Postby paddlesteamer » Thu Apr 01, 2021 3:07 pm

Hello,

I'm trying to write my own audio element. The board I'm using is LyraT v4.3. I created a pipeline i2s_stream_reader -> my_element -> i2s_stream_writer. In the process function of 'my_element', if I forward the samples without doing anything with them, I can hear my voice from the headphones without a problem. If I multiply the samples with 0.5 before forwarding them to i2s_stream_writer, all I hear is very loud noise. Aren't the samples provided to my process function raw audio samples? Should I add some kind of decode element between i2s_stream_reader and my_element?

Here is my code for reference:
  1. #include <string.h>
  2. #include "board.h"
  3. #include "esp_log.h"
  4. #include "audio_pipeline.h"
  5. #include "audio_element.h"
  6. #include "i2s_stream.h"
  7.  
  8. static const char *TAG = "ELEMENTEXAMPLE";
  9.  
  10. static esp_err_t el_open(audio_element_handle_t self) {
  11.     ESP_LOGI(TAG, "open");
  12.     return ESP_OK;
  13. }
  14.  
  15. static int el_process(audio_element_handle_t self, char *buf, int len) {
  16.     int rsize = audio_element_input(self, buf, len);
  17.  
  18.     for (int i =0; i<rsize; i+=4) {
  19.         // convert bytes to samples
  20.         uint16_t lSample = ((((uint16_t) buf[i]) << 8) & 0xFF00) + buf[i+1];
  21.         uint16_t rSample = ((((uint16_t) buf[i+2]) << 8) & 0xFF00) + buf[i+3];
  22.  
  23.         // if i comment out these two lines it works as expected
  24.         lSample *= 0.5;
  25.         rSample *= 0.5;
  26.  
  27.         // convert samples to byte
  28.         buf[i] = (char) (((lSample & 0xFF00) >> 8) & 0xFF);
  29.         buf[i+1] = (char) (lSample & 0xFF);
  30.  
  31.         buf[i+2] = (char) (((rSample & 0xFF00) >> 8) & 0xFF);
  32.         buf[i+3] = (char) (rSample & 0xFF);
  33.     }
  34.  
  35.     rsize = audio_element_output(self, buf, rsize);
  36.  
  37.     return rsize;
  38. }
  39.  
  40. void app_main() {
  41.     audio_pipeline_handle_t pipeline;
  42.     audio_element_handle_t i2s_stream_reader, i2s_stream_writer, my_el;
  43.  
  44.     esp_log_level_set("*", ESP_LOG_INFO);
  45.  
  46.     ESP_LOGI(TAG, "[1.0] Start codec chip");
  47.     audio_board_handle_t board_handle = audio_board_init();
  48.     audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START);
  49.     audio_hal_set_volume(board_handle->audio_hal, 100);
  50.    
  51.     ESP_LOGI(TAG, "[2.0] Create i2s stream to read audio data from codec chip");
  52.     i2s_stream_cfg_t i2sr_cfg = I2S_STREAM_CFG_DEFAULT();
  53.     i2sr_cfg.type = AUDIO_STREAM_READER;
  54.     i2s_stream_reader = i2s_stream_init(&i2sr_cfg);    
  55.  
  56.     ESP_LOGI(TAG, "[3.0] Create i2s stream to write audio data to codec chip");
  57.     i2s_stream_cfg_t i2sw_cfg = I2S_STREAM_CFG_DEFAULT();
  58.     i2sw_cfg.type = AUDIO_STREAM_WRITER;
  59.     i2s_stream_writer = i2s_stream_init(&i2sw_cfg);
  60.  
  61.     ESP_LOGI(TAG, "[4.0] Init Element");
  62.     audio_element_cfg_t cfg = DEFAULT_AUDIO_ELEMENT_CONFIG();
  63.     cfg.open = el_open;
  64.     cfg.process = el_process;
  65.     cfg.tag = "my_el";
  66.  
  67.     my_el = audio_element_init(&cfg);
  68.  
  69.     ESP_LOGI(TAG, "[5.0] Create pipeline");
  70.     audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
  71.     pipeline = audio_pipeline_init(&pipeline_cfg);
  72.     mem_assert(pipeline);
  73.  
  74.     audio_pipeline_register(pipeline, my_el, "my_el");
  75.     audio_pipeline_register(pipeline, i2s_stream_reader, "i2sr");
  76.     audio_pipeline_register(pipeline, i2s_stream_writer, "i2sw");
  77.  
  78.     const char *link_tag[3] = {"i2sr", "my_el", "i2sw"};
  79.     audio_pipeline_link(pipeline, link_tag, 3);
  80.  
  81.     ESP_LOGI(TAG, "[6.0] Run pipeline");
  82.     audio_pipeline_run(pipeline);
  83.  
  84.     ESP_LOGI(TAG, "[ * ] Starting ...");
  85.     while (1) {
  86.         vTaskDelay(2000);
  87.     }
  88. }
P.S: I have checked out a similar post but the example code shared in that post doesn't change the samples so it doesn't answer my question.

paddlesteamer
Posts: 5
Joined: Thu Apr 01, 2021 2:52 pm

Re: Custom Audio Element Problem

Postby paddlesteamer » Thu Apr 01, 2021 8:18 pm

Ok, there were many things wrong with this code. Here is the correct version:
  1. #include <string.h>
  2. #include "board.h"
  3. #include "esp_log.h"
  4. #include "audio_pipeline.h"
  5. #include "audio_element.h"
  6. #include "i2s_stream.h"
  7.  
  8. static const char *TAG = "ELEMENTEXAMPLE";
  9.  
  10. static esp_err_t el_open(audio_element_handle_t self) {
  11.     ESP_LOGI(TAG, "open");
  12.     return ESP_OK;
  13. }
  14. static float convertToF32(uint16_t sample) {
  15.     float s = (float) sample;
  16.  
  17.     s -= 32768.0;
  18.     if (s > 0.0) {
  19.         return s / 32767.0;
  20.     }
  21.  
  22.     return s / 32768.0;
  23. }
  24.  
  25. static uint16_t convertToUInt16(float sample) {
  26.     if (sample > 0.0) {
  27.         sample *= 32767.0;
  28.     } else {
  29.         sample *= 32768.0;
  30.     }
  31.  
  32.     sample += 32768.0;
  33.  
  34.     return (uint16_t) sample;
  35. }
  36.  
  37.  
  38. static int el_process(audio_element_handle_t self, char *buf, int len) {
  39.     int rsize = audio_element_input(self, buf, len);
  40.  
  41.     if (len != rsize || (rsize % 4) != 0) {
  42.         ESP_LOGW(TAG, "unexpected rsize: %d, len: %d", rsize, len);
  43.     }
  44.  
  45.     uint16_t lSample, rSample;
  46.     char * lSamplep = (char *) &lSample;
  47.     char * rSamplep = (char *) &rSample;
  48.     for (int i =0; i<rsize; i+=4) {
  49.         rSamplep[0] = buf[i];
  50.         rSamplep[1] = buf[i+1];
  51.  
  52.         lSamplep[0] = buf[i+2];
  53.         lSamplep[1] = buf[i+3];
  54.  
  55.         float a = convertToF32(rSample);
  56.         a *= 0.5;
  57.         rSample = convertToUInt16(a);
  58.  
  59.         buf[i] = rSamplep[0];
  60.         buf[i+1] = rSamplep[1];
  61.  
  62.         buf[i+2] = lSamplep[0];
  63.         buf[i+3] = lSamplep[1];
  64.     }
  65.  
  66.     rsize = audio_element_output(self, buf, rsize);
  67.  
  68.     return rsize;
  69. }
  70.  
  71. void app_main() {
  72.     audio_pipeline_handle_t pipeline;
  73.     audio_element_handle_t i2s_stream_reader, i2s_stream_writer, my_el;
  74.  
  75.     esp_log_level_set("*", ESP_LOG_INFO);
  76.  
  77.     ESP_LOGI(TAG, "[1.0] Start codec chip");
  78.     audio_board_handle_t board_handle = audio_board_init();
  79.     audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START);
  80.    
  81.     ESP_LOGI(TAG, "[2.0] Create i2s stream to read audio data from codec chip");
  82.     i2s_stream_cfg_t i2sr_cfg = I2S_STREAM_CFG_DEFAULT();
  83.     i2sr_cfg.i2s_config.communication_format = I2S_COMM_FORMAT_I2S_MSB;
  84.     i2sr_cfg.type = AUDIO_STREAM_READER;
  85.     i2s_stream_reader = i2s_stream_init(&i2sr_cfg);    
  86.  
  87.     ESP_LOGI(TAG, "[3.0] Create i2s stream to write audio data to codec chip");
  88.     i2s_stream_cfg_t i2sw_cfg = I2S_STREAM_CFG_DEFAULT();
  89.     i2sw_cfg.i2s_config.communication_format = I2S_COMM_FORMAT_I2S_MSB;
  90.     i2sw_cfg.type = AUDIO_STREAM_WRITER;
  91.     i2s_stream_writer = i2s_stream_init(&i2sw_cfg);
  92.  
  93.     ESP_LOGI(TAG, "[4.0] Init Element");
  94.     audio_element_cfg_t cfg = DEFAULT_AUDIO_ELEMENT_CONFIG();
  95.     cfg.open = el_open;
  96.     cfg.process = el_process;
  97.     cfg.tag = "my_el";
  98.  
  99.     my_el = audio_element_init(&cfg);
  100.  
  101.     ESP_LOGI(TAG, "[5.0] Create pipeline");
  102.     audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
  103.     pipeline = audio_pipeline_init(&pipeline_cfg);
  104.     mem_assert(pipeline);
  105.  
  106.     audio_pipeline_register(pipeline, my_el, "my_el");
  107.     audio_pipeline_register(pipeline, i2s_stream_reader, "i2sr");
  108.     audio_pipeline_register(pipeline, i2s_stream_writer, "i2sw");
  109.  
  110.     const char *link_tag[3] = {"i2sr", "my_el", "i2sw"};
  111.     audio_pipeline_link(pipeline, link_tag, 3);
  112.  
  113.     ESP_LOGI(TAG, "[6.0] Run pipeline");
  114.     audio_pipeline_run(pipeline);
  115.  
  116.     ESP_LOGI(TAG, "[ * ] Starting ...");
  117.     while (1) {
  118.         vTaskDelay(2000);
  119.     }
  120. }
Here I can hear my voice on the left channel but on the right channel there is a very loud noise.

paddlesteamer
Posts: 5
Joined: Thu Apr 01, 2021 2:52 pm

Re: Custom Audio Element Problem

Postby paddlesteamer » Thu Apr 01, 2021 8:36 pm

Ok, the problem is caused by `uint16_t`. Converting it to `int16_t` solved the problem. Here is the corrected part:
  1. static int el_process(audio_element_handle_t self, char *buf, int len) {
  2.     int rsize = audio_element_input(self, buf, len);
  3.  
  4.     if (len != rsize || (rsize % 4) != 0) {
  5.         ESP_LOGW(TAG, "unexpected rsize: %d, len: %d", rsize, len);
  6.     }
  7.  
  8.     int16_t lSample, rSample;
  9.     char * lSamplep = (char *) &lSample;
  10.     char * rSamplep = (char *) &rSample;
  11.     for (int i =0; i<rsize; i+=4) {
  12.         rSamplep[0] = buf[i];
  13.         rSamplep[1] = buf[i+1];
  14.  
  15.         lSamplep[0] = buf[i+2];
  16.         lSamplep[1] = buf[i+3];
  17.  
  18.         float a = convertToF32(rSample);
  19.         a *= 0.5;
  20.         rSample = convertToInt16(a);
  21.  
  22.         buf[i] = rSamplep[0];
  23.         buf[i+1] = rSamplep[1];
  24.  
  25.         buf[i+2] = lSamplep[0];
  26.         buf[i+3] = lSamplep[1];
  27.     }
  28.  
  29.     rsize = audio_element_output(self, buf, rsize);
  30.  
  31.     return rsize;
  32. }


Note that int16 to float conversion is not required, I will be using floats in my code that's what it's used here.

mike84
Posts: 4
Joined: Sat Jun 05, 2021 7:43 am

Re: Custom Audio Element Problem

Postby mike84 » Wed Jun 23, 2021 7:54 am

paddlesteamer wrote:
Thu Apr 01, 2021 8:36 pm
Ok, the problem is caused by `uint16_t`. Converting it to `int16_t` solved the problem. Here is the corrected part:

Code: Select all

static int el_process(audio_element_handle_t self, char *buf, int len) {
    int rsize = audio_element_input(self, buf, len);
 
    if (len != rsize || (rsize % 4) != 0) {
        ESP_LOGW(TAG, "unexpected rsize: %d, len: %d", rsize, len);
    }
 
    int16_t lSample, rSample;
    char * lSamplep = (char *) &lSample;
    char * rSamplep = (char *) &rSample;
    for (int i =0; i<rsize; i+=4) {
        rSamplep[0] = buf[i];
        rSamplep[1] = buf[i+1];
 
        lSamplep[0] = buf[i+2];
        lSamplep[1] = buf[i+3];
 
        float a = convertToF32(rSample);
        a *= 0.5;
        rSample = convertToInt16(a);
 
        buf[i] = rSamplep[0];
        buf[i+1] = rSamplep[1];
 
        buf[i+2] = lSamplep[0];
        buf[i+3] = lSamplep[1];
    }
 
    rsize = audio_element_output(self, buf, rsize);
 
    return rsize;
}
Note that int16 to float conversion is not required, I will be using floats in my code that's what it's used here.
Hello paddlesteamer!

I want to make an ESP32 based device. The input signal will be fed via the I2S interface (num_0) to the ESP32. It will be for example a regular mems microphone.

Next, I want to do audio processing in ESP32. Sound processing will be to encrypt it, for example AES (in the AES library I found, it is encrypted in groups of 16 elements). This library takes 2-3 ms to encrypt 16 elements. If the buffer has 64 elements, then it will take about 12 ms to encrypt.

After encryption, I want to output sound also on the I2S interface (num_0 or num_1).
In principle, I wrote this code, but when I start doing something between the functions i2s_read_bytes and i2s_write_bytes, then I start having problems with the sound. For the experiment, I just inserted delay 6 or more ms.
Here is my code:

Code: Select all

#include <driver/i2s.h> 
const int sample_rate = 16000; // 44100 
esp_err_t err;
 
uint16_t buf_len = 32; // 1024
uint8_t *buf = (uint8_t*) calloc(buf_len, sizeof(uint8_t));
int bytes_written = 0;
 
void setup() {
Serial.begin(500000);
delay(100);
Serial.println(" "); Serial.println("Setup I2S ...");
delay(500);
i2s_install();
i2s_setpin();
delay(500); 
}
 
void loop() {
int bytes_read = 0;
while(bytes_read == 0) {
   bytes_read = i2s_read_bytes(I2S_NUM_0, buf, buf_len, portMAX_DELAY);
 } 
 
delay (6); 
 
i2s_write_bytes(I2S_NUM_0, buf, bytes_read, portMAX_DELAY); 
} 
 
void i2s_install() {
const i2s_config_t i2s_config = {
.mode = i2s_mode_t(I2S_MODE_MASTER | I2S_MODE_TX | I2S_MODE_RX),
.sample_rate = sample_rate,
.bits_per_sample = i2s_bits_per_sample_t(16),
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, // default interrupt priority
.dma_buf_count = 16, // 32
.dma_buf_len = 64, // 32 * 2
};
err = i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL); 
if (err != ESP_OK) { 
 Serial.printf("Failed installing driver_1: %d\n", err);
  while (true);
 } 
Serial.println("I2S driver installed."); 
} 
 
void i2s_setpin() {
const i2s_pin_config_t pin_config = {
.bck_io_num =   26,
.ws_io_num =    25,
.data_out_num = 22,
.data_in_num =  33
  }; 
err = i2s_set_pin(I2S_NUM_0, &pin_config);
if (err != ESP_OK) {
Serial.printf("Failed setting pin_1: %d\n", err);
while (true);
}
Serial.println("I2S pins installed."); Serial.println(" ");
}
Processing takes 10-15 ms for buf_len = 64. For experiment, I try setting delay (12).
If you set delay (12) and buf_len = 1024, then everything is fine. If you set buf_len = 64, then the sound is wheezing at the DAC output.

But here's one thing. The larger the buffer (buf_len) in this case, the longer it will take to process and, accordingly, it will no longer be 10-15 ms.
Taking measurements (with millis), I got that 16 elements of the buffer are processed (by the aes encryption library) on average 3ms. Thus, it is impossible to set a large value of buf_len.

Can you tell me what to do next?

paddlesteamer
Posts: 5
Joined: Thu Apr 01, 2021 2:52 pm

Re: Custom Audio Element Problem

Postby paddlesteamer » Thu Jun 24, 2021 10:13 am

Hello,

I think your approach is not real-time. In the loop function, you first call `i2s_read_bytes` then wait 6 ms, then you send them to i2s output, and then, again you read the next batch. The latter batch you read is 6 ms late. I don't know Arduino-style coding on esp32 but there should be a method where you can register a callback function which will be called every time there are samples ready on i2s input. In your callback function, you should read bytes from i2s input to a buffer, forward them immediately to i2s output, and only after that you should run the encryption process on the buffer. If you really want to wait for AES encryption to complete before you output samples , then you should have an output buffer instead of outputting read bytes immediately, you should output bytes from the beginning of the output buffer and append the read bytes to the output buffer after the encryption process is finished.

mike84
Posts: 4
Joined: Sat Jun 05, 2021 7:43 am

Re: Custom Audio Element Problem

Postby mike84 » Thu Jun 24, 2021 12:41 pm

paddlesteamer wrote:
Thu Jun 24, 2021 10:13 am
Hello,

I think your approach is not real-time. In the loop function, you first call `i2s_read_bytes` then wait 6 ms, then you send them to i2s output, and then, again you read the next batch. The latter batch you read is 6 ms late. I don't know Arduino-style coding on esp32 but there should be a method where you can register a callback function which will be called every time there are samples ready on i2s input. In your callback function, you should read bytes from i2s input to a buffer, forward them immediately to i2s output, and only after that you should run the encryption process on the buffer. If you really want to wait for AES encryption to complete before you output samples , then you should have an output buffer instead of outputting read bytes immediately, you should output bytes from the beginning of the output buffer and append the read bytes to the output buffer after the encryption process is finished.
Paddlesteamer! Thank you very much for your reply!
Please, can you show at least a little example in the code how I need to do it?

So, I need to create one (or maybe several) intermediate buffers?

Can you add a few lines to my code to tell me how to do it?

chitarrastonata
Posts: 1
Joined: Sat Feb 12, 2022 1:38 pm

Re: Custom Audio Element Problem

Postby chitarrastonata » Sat Feb 12, 2022 1:45 pm

Ciao paddlesteamer,
the easiest way to handle a signed value is to use an signed variable!
You are using unsigned, so it became everything complicated.
Try this sample code, it implement a very simple echo:

#define ECHO_LEN (1024*32)
int16_t echoBufR[ECHO_LEN];
int16_t echoBufL[ECHO_LEN];
uint16_t echoPtr = 0;

static int el_process(audio_element_handle_t self, char *buf, int len) {
int rsize = audio_element_input(self, buf, len);

if (len != rsize || (rsize % 4) != 0) {
ESP_LOGW(TAG, "unexpected rsize: %d, len: %d", rsize, len);
}

int16_t rSample, lSample;
for (int i =0; i<rsize; i+=4) {
rSample = *((int16_t*)(&buf[i+0]));
lSample = *((int16_t*)(&buf[i+2]));

// ECHO
rSample += (echoBufR[echoPtr] / 2);
lSample += (echoBufL[echoPtr] / 2);
echoBufR[echoPtr] = rSample;
echoBufL[echoPtr] = lSample;
if(++echoPtr >= ECHO_LEN) {
echoPtr = 0;
}

*((int16_t*)(&buf[i+0])) = rSample;
*((int16_t*)(&buf[i+2])) = lSample;
}

rsize = audio_element_output(self, buf, rsize);
return rsize;
}

Who is online

Users browsing this forum: No registered users and 48 guests