关注问题本身是值得长期去做的事情,想法一点都不重要。

微信小程序通过讯飞实现语音听写

1. 加载需要的库

// https://cryptojs.gitbook.io/docs/
const CryptoJS = require('crypto-js');
const Base64 = {
  _keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
  encode (str:string) { // 加密
    var output = "";
    var chr1, chr2, chr3, enc1, enc2, enc3, enc4;
    var i = 0;
    str = this._utf16to8(str);
    while (i < str.length) {
      chr1 = str.charCodeAt(i++);
      chr2 = str.charCodeAt(i++);
      chr3 = str.charCodeAt(i++);
      enc1 = chr1 >> 2;
      enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
      enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
      enc4 = chr3 & 63;
      if (isNaN(chr2)) {
        enc3 = enc4 = 64;
      } else if (isNaN(chr3)) {
        enc4 = 64;
      }
      output = output + this._keyStr.charAt(enc1) + this._keyStr.charAt(enc2) + this._keyStr.charAt(enc3) + this._keyStr.charAt(enc4);
    } return output;
  },
  decode (input:string) { // 解密
    var output = "";
    var chr1, chr2, chr3;
    var enc1, enc2, enc3, enc4;
    var i = 0;
    input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
    while (i < input.length) {
        enc1 = this._keyStr.indexOf(input.charAt(i++));
        enc2 = this._keyStr.indexOf(input.charAt(i++));
        enc3 = this._keyStr.indexOf(input.charAt(i++));
        enc4 = this._keyStr.indexOf(input.charAt(i++));
        chr1 = (enc1 << 2) | (enc2 >> 4);
        chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);
        chr3 = ((enc3 & 3) << 6) | enc4;
        output = output + String.fromCharCode(chr1);
        if (enc3 != 64) {
            output = output + String.fromCharCode(chr2);
        }
        if (enc4 != 64) {
            output = output + String.fromCharCode(chr3);
        }
    } return this._utf8to16(output);
  },
  _utf16to8: function(str:string) {
    var out, i, len, c;
    out = "";
    len = str.length;
    for(i = 0; i < len; i++) {
        c = str.charCodeAt(i);
        if ((c >= 0x0001) && (c <= 0x007F)) {
            out += str.charAt(i);
        } else if (c > 0x07FF) {
            out += String.fromCharCode(0xE0 | ((c >> 12) & 0x0F));
            out += String.fromCharCode(0x80 | ((c >> 6) & 0x3F));
            out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
        } else {
            out += String.fromCharCode(0xC0 | ((c >> 6) & 0x1F));
            out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
        }
    }
    return out;
  },
  _utf8to16 (str:string) { 
    var out, i, len, c;
    var char2, char3;
    out = "";
    len = str.length;
    i = 0;
    while(i < len) {
        c = str.charCodeAt(i++);
        switch(c >> 4)
        {
            case 0: case 1: case 2: case 3: case 4: case 5: case 6:case7:
              out += str.charAt(i-1);
            break;
            case 12: case 13:
              char2 = str.charCodeAt(i++);
              out += String.fromCharCode(((c & 0x1F) << 6) | (char2&0x3F));
            break;
            case 14:
              char2 = str.charCodeAt(i++);
              char3 = str.charCodeAt(i++);
              out += String.fromCharCode(((c & 0x0F) << 12) |
                ((char2 & 0x3F) << 6) |
                ((char3 & 0x3F) << 0));
            break;
        }
    } return out;
  }
}

2. 初始化全局变量

const rm:any = wx.getRecorderManager()
let wxst:any; //语音websocket
let status:number = 0;  // 音频的状态
let iatResult:any = [] // 识别结果

3.onLoad 的时候监听

rm.onStart(() => {//开始录音时触发
      status = 0;
      iatResult = []
      console.log('录音开始')
    });
    rm.onError((res:any) => {//错误回调
      console.log('录音错误',res);
    });
    rm.onStop(() => {//结束录音时触发
      console.log('录音结束')
      status = 2;
      wx.hideLoading();
      if (wxst.readyState !== 1) {
        return
      }
      wxst.send({
        data: '{"data":{"status":2,"audio":"","format":"audio/L16;rate=8000","encoding":"raw"}}'
      })
      
    //   const audio = wx.createInnerAudioContext();
    //   audio.src=res.tempFilePath;
    //   audio.play();
    });
    rm.onFrameRecorded((res:any) => {//每帧触发
      if (wxst.readyState !== 1) {
        return
      }
      const { frameBuffer } = res
      var int16Arr = new Int8Array(frameBuffer);
      const base64 = wx.arrayBufferToBase64(int16Arr)
      switch (status) {
        case 0:
          status = 1;
          wxst.send({
            data: JSON.stringify({
              "common": { 
                "app_id": "ac9ed282" 
              },
              "business": {
                "language": "zh_cn",
                "domain": "iat",
                "accent": "mandarin",
                "dwa": "wpgs",
                "ptt": 1,
                "vad_eos": 1000
              },
              "data": {
                "status": 0,
                "format": "audio/L16;rate=16000",
                "encoding": "raw",
                "audio": base64
              }
            })
          })
          break;
        case 1:
          wxst.send({
            data: JSON.stringify({
              "data": {
                "status": 1,
                "format": "audio/L16;rate=16000",
                "encoding": "raw",
                "audio": base64
              }
            })
          })
          break;
      }
    })

4.初始化生成链接url

  getUrl(){
    let url = 'wss://iat-api.xfyun.cn/v2/iat'
    let host = 'iat-api.xfyun.cn'
    let apiKey = '5d88a82aa6******'//需要修改
    let apiSecret = 'ODdjMjUwY*****'//需要修改
    let date = new Date().toUTCString()
    let algorithm = 'hmac-sha256'
    let headers = 'host date request-line'
    let signatureOrigin = `host: ${host}\ndate: ${date}\nGET /v2/iat HTTP/1.1`
    let signatureSha = CryptoJS.HmacSHA256(signatureOrigin, apiSecret)
    let signature = CryptoJS.enc.Base64.stringify(signatureSha)
    let authorizationOrigin = `api_key="${apiKey}", algorithm="${algorithm}", headers="${headers}", signature="${signature}"`
    let authorization = Base64.encode(authorizationOrigin)
    url = `${url}?authorization=${authorization}&date=${encodeURI(date)}&host=${host}`;
    return url;
  }

5. 语音输入执行

record(){
    this.setData({temp:this.data.info});
    wx.showLoading({
      title: '请说话',
    })
    wxst = wx.connectSocket({ // 开启websocket连接
      url: this.getUrl(),
      success: ()=> {
        //开始录音
        rm.start({
          duration: 1000 * 60,
          sampleRate: 16000,
          numberOfChannels: 1,
          encodeBitRate: 48000,
          format: 'PCM',
          frameSize: 5
        });
      }
    });
    wxst.onOpen(()=> {
      console.log('WebSocket 连接成功');
      wxst.onMessage((res:any)=> {
        console.log(res.data);
        var data = JSON.parse(res.data);
        
        if (data.code != 0) {
          console.log("error code " + data.code + ", reason " + data.message)
          return
        }

 
        let result=data.data.result;
        let str = "";
        iatResult[result.sn] = result
        if (result.pgs == 'rpl') {
          result.rg.forEach((i:any) => {
            iatResult[i] = null
          })
        }
        iatResult.forEach((item:any) => {
          if (item != null) {
            item.ws.forEach((ws:any) => {
              ws.cw.forEach((cw:any) => {
                str += cw.w;
              })
            })
          }
        })
        // console.log(str);
        this.setData({
          info: this.data.temp+str //这个是中间的语音识别结果
        })
 
        if (data.data.status == 2) {//最终识别结果
          // data.data.status ==2 说明数据全部返回完毕,可以关闭连接,释放资源
          wxst.close();
          return;
        }

      });
      wxst.onClose(()=> {
        console.log('WebSocket 关闭成功');
      });
    });

    wxst.onError((res:any)=> {
      wx.showToast({
        title: '语音服务异常',
        icon: 'error',
        duration: 2000
      })
      console.log('WebSocket 错误',res.errMsg);
    });
  }

6.语音输入长按松开

  recordEnd(){
    setTimeout(()=>{
      rm.stop();
    },500);
  }

7.最后在wxml 模板里设置事件即可

bindlongpress="record"  bindtouchend="recordEnd"

最后附上讯飞的文档

https://www.xfyun.cn/doc/asr/voicedictation/API.html#%E6%8E%A5%E5%8F%A3%E8%AF%B4%E6%98%8E