【ESP32S3 Sense接入百度在线语音识别】_数据分析

视频地址：

esp32s3 sense接入百度在线语音识别

目前这是我使用的esp32s3官方硬件👍👍👍（小小的身材有大大的力量）只需要35元加摄像头麦克风79元，后期我会整理相关专栏进行arduino系统学习😘😘😘。有需要可以购买xiao开发板💕💕💕，seeedxiao esp32s3 sense硬件购买地址：https://s.click.taobao.com/lekazrt
在这里插入图片描述

1. 前言

在这里插入图片描述

使用seeed xiao esp32s3 sense开发板接入百度智能云实现在线语音识别。自带麦克风模块用做语音输入，通过串口发送字符“1”来控制数据的采集和上传。
在这里插入图片描述

2. 操作流程

2.1 创建语音识别应用

登录百度云账号，选择语音识别
官网地址：https://ai.baidu.com/tech/speech
在这里插入图片描述
新用户可以直接领取资源，也可付费接入，创建应用。

根据创建应用生成的api key和secret key来获取token，创建好应用，点管理应用，会有api key和secret key，如下图应用创建成功

2.2 api秘钥创建

点击在线调试
在这里插入图片描述
按照如下顺序选择

有了api key和secret key就可以得到token，下面附上esp32进行get请求得到token的代码

在这里插入图片描述

access_token对应的值就是可用的token了，每次申请的token有效期为30天，过期需要重新申请，可以申请多个。不用每次都调用获取token的程序，申请一个可以用30天，定时更新就可以吧。

3. json语音接入

采集数据，post发送到请求api数据上传 post 方式有 2 种：json 格式和raw 格式。
在这里插入图片描述

3.1 json格式

这里介绍使用使用json格式上传的方式，下图为json格式上传的一些必要的参数说明
在这里插入图片描述

3.2 esp32s3 sense接入代码

图中对数据类型和内容说的很明确了，只需要按照这个格式打包好数据然后发送就行，下面是esp32的具体实现代码。

#include <arduino.h>
#include "base64.h"
#include "wifi.h"
#include "httpclient.h"
#include "cjson.h"
#include <i2s.h>
#include <arduinojson.h>

// #define key 4             //端口0
// #define adc 2             //端口39
// #define led 15            //端口2
const int buttonpin = 1;  // the number of the pushbutton pin
const int ledpin = 21;    // the number of the led pin
httpclient http_client;
// 1. replace with your network credentials
const char* ssid = "j09 502";
const char* password = "qwertyuiop111";
hw_timer_t* timer = null;

#define data_len 16000
uint16_t adc_data[data_len];  //16000个数据，8k采样率，即2秒，录音时间为2秒，想要实现更长时间的语音识别，就要改这个数组大小
                              //和下面data_json数组的大小，改大一些。

uint8_t adc_start_flag = 0;     //开始标志
uint8_t adc_complete_flag = 0;  //完成标志


char data_json[45000];  //用于储存json格式的数据,大一点,json编码后数据字节数变成原来的4/3,所以得计算好,避免出现越界


void iram_attr ontimer();
void gain_token(void);
void setup() {

  //serial.begin(921600);
  serial.begin(115200);
  // pinmode(adc, analog);
  // pinmode(buttonpin, input_pullup);
  pinmode(ledpin, output);
  // start i2s at 16 khz with 16-bits per sample
  i2s.setallpins(-1, 42, 41, -1, -1);
  if (!i2s.begin(pdm_mono_mode, 16000, 16)) {
    serial.println("failed to initialize i2s!");
    while (1)
      ;  // do nothing
  }
  uint8_t count = 0;
  wifi.mode(wifi_sta);
  wifi.begin(ssid, password);
  while (wifi.status() != wl_connected) {
    serial.print(".");
    count++;
    if (count >= 75) {
      serial.printf("\r\n-- wifi connect fail! --");
      break;
    }
    vtaskdelay(200);
  }
  serial.printf("\r\n-- wifi connect success! --\r\n");

  // gain_token();

  timer = timerbegin(0, 80, true);    //  80m的时钟 80分频 1m
  timeralarmwrite(timer, 125, true);  //  1m  计125个数进中断  8k
  timerattachinterrupt(timer, &ontimer, true);
  timeralarmenable(timer);
  timerstop(timer);  //先暂停
}


uint32_t time1, time2;
void loop() {

  if (serial.available() > 0)  //按键按下
  {
    if (serial.read() == '1') {
      serial.printf("start recognition\r\n\r\n");
      digitalwrite(ledpin, high);
      adc_start_flag = 1;
      timerstart(timer);

      // time1=micros();
      while (!adc_complete_flag)  //等待采集完成
      {
        ets_delay_us(10);
      }
      // time2=micros()-time1;

      timerstop(timer);
      adc_complete_flag = 0;  //清标志

      digitalwrite(ledpin, low);

      // serial.printf("time:%d\r\n",time2);  //打印花费时间


      memset(data_json, '\0', strlen(data_json));  //将数组清空
      strcat(data_json, "{");
      strcat(data_json, "\"format\":\"pcm\",");
      strcat(data_json, "\"rate\":16000,");                                                                        //采样率    如果采样率改变了，记得修改该值，只有16000、8000两个固定采样率
      strcat(data_json, "\"dev_pid\":1537,");                                                                      //中文普通话
      strcat(data_json, "\"channel\":1,");                                                                         //单声道
      strcat(data_json, "\"cuid\":\"666666\",");                                                                   //识别码    随便打几个字符，但最好唯一
      strcat(data_json, "\"token\":\"24.8f6133335e191.2592000.1713789066.282335-57722200\",");  //token	这里需要修改成自己申请到的token
      strcat(data_json, "\"len\":32000,");                                                                         //数据长度  如果传输的数据长度改变了，记得修改该值，该值是adc采集的数据字节数，不是base64编码后的长度
      strcat(data_json, "\"speech\":\"");
      strcat(data_json, base64::encode((uint8_t*)adc_data, sizeof(adc_data)).c_str());  //base64编码数据
      strcat(data_json, "\"");
      strcat(data_json, "}");
      // serial.println(data_json);


      int httpcode;
      http_client.settimeout(5000);
      http_client.begin("http://vop.baidu.com/server_api");  //https://vop.baidu.com/pro_api
      http_client.addheader("content-type", "application/json");
      httpcode = http_client.post(data_json);

      if (httpcode == 200) {
        if (httpcode == http_code_ok) {
          string response = http_client.getstring();
          http_client.end();
          serial.println(response);
          // parse json response
          dynamicjsondocument jsondoc(512);
          deserializejson(jsondoc, response);
          string outputtext = jsondoc["result"][0];
          // 访问"result"数组，并获取其第一个元
          // 输出结果
          serial.println(outputtext);

        } else {
          serial.printf("[http] get... failed, error: %s\n", http_client.errortostring(httpcode).c_str());
        }
      }
      // while (!digitalread(buttonpin))
      //   ;
      serial.printf("recognition complete\r\n");
    }
  }
  vtaskdelay(1);
}


uint32_t num = 0;
portmux_type timermux = portmux_initializer_unlocked;
void iram_attr ontimer() {
  // increment the counter and set the time of isr
  portenter_critical_isr(&timermux);
  if (adc_start_flag == 1) {
    //serial.println("");
    // adc_data[num] = analogread(adc);
    adc_data[num] = i2s.read();
    num++;
    if (num >= data_len) {
      adc_complete_flag = 1;
      adc_start_flag = 0;
      num = 0;
      //serial.println(complete_flag);
    }
  }
  portexit_critical_isr(&timermux);
}



// void gain_token(void)  //获取token
// {
//   int httpcode;
//   //注意，要把下面网址中的your_apikey和your_secretkey替换成自己的api key和secret key
//   http_client.begin("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=your_apikey&client_secret=your_secretkey");
//   httpcode = http_client.get();
//   if (httpcode > 0) {
//     if (httpcode == http_code_ok) {
//       string payload = http_client.getstring();
//       serial.println(payload);
//     }
//   } else {
//     serial.printf("[http] get... failed, error: %s\n", http_client.errortostring(httpcode).c_str());
//   }
//   http_client.end();
// }

3.3 esp32接入代码

使用esp32接入百度智能云实现在线语音识别，max9814麦克风模块接gpio2用做语音输入，一个按键接gpio3来控制数据的采集和上传

#include <arduino.h>
#include "base64.h"
#include "wifi.h"
#include "httpclient.h"
#include "cjson.h"
#include <arduinojson.h>

// 1、修改按键和max9814接口
#define key 3
#define adc 2
// 2、修改百度语言技术的用户信息：https://console.bce.baidu.com/ai/?fromai=1#/ai/speech/app/list
const int dev_pid = 1537;
const char *cuid = "577200";
const char *client_id = "bxl2ys33b7xw5xdq";
const char *client_secret = "pb2ziw2nch2untc9wpk4ekx";
string token;
// 全局变量声明
httpclient http_client;
hw_timer_t *timer = null;
const int recordtimeseconds = 3;//录音时间秒为单位
const int adc_data_len = 16000 * recordtimeseconds;
const int data_json_len = adc_data_len * 2 * 1.4;
uint16_t *adc_data;
char *data_json;
uint8_t adc_start_flag = 0;
uint8_t adc_complete_flag = 0;
uint32_t num = 0;
portmux_type timermux = portmux_initializer_unlocked;

// 函数声明
void iram_attr ontimer();
string gaintoken();
void assemblejson(string token);
void sendtostt();

void setup() {
  serial.begin(115200);
  pinmode(adc, analog);
  pinmode(key, input_pullup);
  wifi.disconnect(true);
  // 3、填写您的wifi账号密码
  wifi.begin("j09 502", "qwertyuiop111");  
  while (wifi.status() != wl_connected) {
    serial.print(".");
    vtaskdelay(200);
  }
  serial.println("\n-- wifi connect success! --");
  token = gaintoken();
  timer = timerbegin(0, 40, true);
  timeralarmwrite(timer, 125, true);
  timerattachinterrupt(timer, &ontimer, true);
  timeralarmenable(timer);
  timerstop(timer);  // 先暂停

  // 动态分配psram
  adc_data = (uint16_t *)ps_malloc(adc_data_len * sizeof(uint16_t));
  if (!adc_data) {
    serial.println("failed to allocate memory for adc_data");
  }

  data_json = (char *)ps_malloc(data_json_len * sizeof(char));  // 根据需要调整大小
  if (!data_json) {
    serial.println("failed to allocate memory for data_json");
  }
}

uint32_t time1, time2;
void loop() {
  if (digitalread(key) == 1) {
    delay(10);
    if (digitalread(key) == 1) {
      serial.println("开始录音");
      adc_start_flag = 1;
      timerstart(timer);

      while (!adc_complete_flag) {
        ets_delay_us(10);
      }

      serial.println("录音结束");
      timerstop(timer);
      adc_complete_flag = 0;

      assemblejson(token);
      sendtostt();

      // while (!digitalread(key));
      serial.println("recognition complete");
    }
  }
}

void assemblejson(string token) {
  memset(data_json, '\0', data_json_len * sizeof(char));
  strcat(data_json, "{");
  strcat(data_json, "\"format\":\"pcm\",");
  strcat(data_json, "\"rate\":16000,");
  strcat(data_json, "\"dev_pid\":1537,");
  strcat(data_json, "\"channel\":1,");
  strcat(data_json, "\"cuid\":\"57722200\",");
  strcat(data_json, "\"token\":\"");
  strcat(data_json, token.c_str());
  strcat(data_json, "\",");
  sprintf(data_json + strlen(data_json), "\"len\":%d,", adc_data_len * 2);
  strcat(data_json, "\"speech\":\"");
  strcat(data_json, base64::encode((uint8_t *)adc_data, adc_data_len * sizeof(uint16_t)).c_str());
  strcat(data_json, "\"");
  strcat(data_json, "}");
}

void sendtostt() {
  http_client.begin("http://vop.baidu.com/server_api");
  http_client.addheader("content-type", "application/json");
  int httpcode = http_client.post(data_json);

  if (httpcode > 0) {
    if (httpcode == http_code_ok) {
      string payload = http_client.getstring();
      serial.println(payload);
    }
  } else {
    serial.printf("[http] post failed, error: %s\n", http_client.errortostring(httpcode).c_str());
  }
  http_client.end();
}


void iram_attr ontimer() {
  // increment the counter and set the time of isr
  portenter_critical_isr(&timermux);
  if (adc_start_flag == 1) {
    // serial.println("");
    adc_data[num] = analogread(adc);
    num++;
    if (num >= adc_data_len) {
      adc_complete_flag = 1;
      adc_start_flag = 0;
      num = 0;
      // serial.println(complete_flag);
    }
  }
  portexit_critical_isr(&timermux);
}

string gaintoken() {
  httpclient http;
  string token;
  string url = string("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=") + client_id + "&client_secret=" + client_secret;

  http.begin(url);
  int httpcode = http.get();

  if (httpcode > 0) {
    string payload = http.getstring();
    dynamicjsondocument doc(1024);
    deserializejson(doc, payload);
    token = doc["access_token"].as<string>();
    serial.println(token);
  } else {
    serial.println("error on http request for token");
  }
  http.end();
  return token;
}

上面代码是将数据拼接成要求的json的格式并通过post的方式发送到请求api，并接收打印返回的数据消息。使用的定时器设置成8k频率定时采集音频数据，上面代码中并未展示，后面会附上完整代码。
esp32是有json库的，在 “cjson.h” 头文件中，但是我没有用，因为我发现数据太长时不知道为啥会出现莫名其妙的错误，也没深究，就使用函数strcat()将数据拼接成规定的格式，好使，就是写的时候麻烦一些，但问题不大。
post发送数据有一个固定头部：content-type:application/json，post前需要设置一下。

4. 接收数据

参考以下烧录配置，
在这里插入图片描述

串口输入字符“1”，点击按回车键，然后有2s录音时间。等待百度在线语音识别返回，在上一步的代码中实现了接收数据，这里列一下返回的数据。

22:04:58.854 -> start recognition
22:04:58.854 -> 
22:05:01.558 -> {"corpus_no":"7349559668823131804","err_msg":"success.","err_no":0,"result":["你好。"],"sn":"922395388061711202708"}
22:05:01.558 -> 
22:05:01.558 -> 你好。
22:05:01.558 -> recognition complete
22:08:46.838 -> start recognition
22:08:46.838 -> 
22:08:49.809 -> {"corpus_no":"7349560648200206506","err_msg":"success.","err_no":0,"result":["你知道百度吗？"],"sn":"497775464181711202936"}
22:08:49.809 -> 
22:08:49.809 -> 你知道百度吗？
22:08:49.809 -> recognition complete
22:08:54.218 -> start recognition
22:08:54.218 -> 
22:08:57.084 -> {"corpus_no":"7349560678205790969","err_msg":"success.","err_no":0,"result":["我喜欢小黄人。"],"sn":"748488478211711202943"}
22:08:57.084 -> 
22:08:57.084 -> 我喜欢小黄人。
22:08:57.084 -> recognition complete

在这里插入图片描述

数据发送成功则会返回正确的识别数据，当然声音信号不好时返回的语音识别也会不准确。

5. 总结

本文使用seeed xiao esp32s3 sense开发板接入百度智能云实现在线语音识别。自带麦克风模块用做语音输入，通过串口发送字符“1”来控制数据的采集和上传。从而实现对外部世界进行感知，充分认识这个有机与无机的环境，科学地合理地进行创作和发挥效益，然后为人类社会发展贡献一点微薄之力。🤣🤣🤣

参考文献：esp32接入百度智能云语音识别，实现在线语音识别