文章目录

 

概要

整个过程是从获取权限,到录音,到语音识别

语音识别 技术栈是用了 speechRecognizer,文档:https://developer.huawei.com/consumer/cn/doc/harmonyos-references/hms-ai-speechrecognizer#section184134193313

录音 技术栈用的 audio.AudioCapturer,文档:https://developer.huawei.com/consumer/cn/doc/harmonyos-references/arkts-apis-audio-audiocapturer

整体技术流程

  1. 校验当前设备是否具备该语音识别能力:
     canIUse('systemcapability.AI.SpeechRecognizer') 
    
  2. 创建语音识别引擎
  3. 给语音识别引擎设置监听(onResult识别结果含过程 onComplete识别完成)
  4. 识别引擎开始
  5. 创建音频采集器
  6. 准备录音,监听readData事件 获取采集的声音 给识别引擎写入声音
  7. 音频采集器开始

技术细节

1,创建语音识别引擎

 asrEngine: speechRecognizer.SpeechRecognitionEngine | null = null
// 创建语音识别引擎
  this.asrEngine = await speechRecognizer.createEngine({
	  language: 'zh-CN',
	  online: 1,
	  extraParams: { "locate": "CN", "recognizerMode": "long" }
  })

2,给语音识别引擎设置监听(onResult识别结果含过程 onComplete识别完成)

 // 创建回调对象
   this.asrEngine.setListener({
     onStart(sessionId: string, eventMessage: string) {
       console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
       this.voiceState = VoiceState.VOICING
     },
     // 事件回调
     onEvent(sessionId: string, eventCode: number, eventMessage: string) {
       console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`)
     },
     // 识别结果回调,包括中间结果和最终结果
     onResult: (sessionId: string, result: speechRecognizer.SpeechRecognitionResult) => {
       Logger.info(result, '识别结果')
       this.keyword = result.result
       this.onChange(this.keyword)
     },
     // 识别完成回调
     onComplete(sessionId: string, eventMessage: string) {
       console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
       this.voiceState = VoiceState.DEFAULT
     },
     // 错误回调,错误码通过本方法返回
     // 返回错误码1002200002,开始识别失败,重复启动startListening方法时触发
     // 更多错误码请参考错误码参考
     onError(sessionId: string, errorCode: number, errorMessage: string) {
       console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`)
       this.voiceState = VoiceState.VOICEOVER
       this.onComplete(this.keyword) // 通知父 完成识别 且结果给你
       this.keyword = ''
     }
   })
  1. 识别引擎开始
 // 开始识别
    this.asrEngine.startListening({
      sessionId: this.sessionId,
      audioInfo: {
        audioType: 'pcm',
        sampleRate: 16000,
        soundChannel: 1,
        sampleBit: 16
      }
    })
  1. 创建音频采集器
// 创建音频采集器
    this.audioCapturer = await audio.createAudioCapturer({
      streamInfo: {
        samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率。
        channels: audio.AudioChannel.CHANNEL_2, // 通道。
        sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式。
        encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式。
      },
      capturerInfo: {
        source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型:Mic音频源。根据业务场景配置,参考SourceType。
        capturerFlags: 0 // 音频采集器标志。
      }
    });
  1. 准备录音,监听readData事件 获取采集的声音 给识别引擎写入声音
async startRecord() {
    this.startEngine()
    //1,配置音频采集参数并创建audioCapturer实例
    this.audioCapturer = await audio.createAudioCapturer({
      streamInfo: {
        samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率。
        channels: audio.AudioChannel.CHANNEL_1, // 通道。
        sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式。
        encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式。
      },
      capturerInfo: {
        source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型:Mic音频源。根据业务场景配置,参考SourceType。
        capturerFlags: 0 // 音频采集器标志。
      }
    })
    //2,监听
    this.audioCapturer?.on('readData', (buffer) => {
    //获取采集的声音 给识别引擎写入声音
      this.asrEngine?.writeAudio(this.sessionId, new Uint8Array(buffer))
    })
    this.voiceState = VoiceState.VOICING

  }
  1. 音频采集器开始
// 开始采集
    await this.audioCapturer.start()

小demo案例,一些业务变量可以去掉一下

import { audio } from '@kit.AudioKit'
import { speechRecognizer } from '@kit.CoreSpeechKit'
import { Logger, permissionPlugin } from 'basic'
import { promptAction, window } from '@kit.ArkUI'
import { fileIo } from '@kit.CoreFileKit'
import { BusinessError } from '@kit.BasicServicesKit'

export enum VoiceState {
  DEFAULT,
  VOICING,
  VOICEOVER
}

@ComponentV2
export struct AudioSearchComp {
  @Local voiceState: VoiceState = VoiceState.DEFAULT
  @Local keyword: string = ''
  audioCapturer: audio.AudioCapturer | null = null
  asrEngine: speechRecognizer.SpeechRecognitionEngine | null = null
  sessionId: string = Date.now().toString();
  @Event onChange: (keyword: string) => void = () => {
  }
  @Event onComplete: (keyword: string) => void = () => {
  }

  async startRecord() {
    this.startEngine()
    //1,配置音频采集参数并创建audioCapturer实例
    this.audioCapturer = await audio.createAudioCapturer({
      streamInfo: {
        samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率。
        channels: audio.AudioChannel.CHANNEL_1, // 通道。
        sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式。
        encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式。
      },
      capturerInfo: {
        source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型:Mic音频源。根据业务场景配置,参考SourceType。
        capturerFlags: 0 // 音频采集器标志。
      }
    })
    //2,采集
    this.audioCapturer?.on('readData', (buffer) => {
    //获取采集的声音 给识别引擎写入声音
      this.asrEngine?.writeAudio(this.sessionId, new Uint8Array(buffer))
    })
    // 开始采集
    this.audioCapturer.start()
    this.voiceState = VoiceState.VOICING
  }

  async startEngine() {
    if (!canIUse('SystemCapability.AI.SpeechRecognizer')) {
      promptAction.showToast({ message: "当前设备不具备这个能力哦" })
    }
    // 创建语音识别引擎
    this.asrEngine = await speechRecognizer.createEngine({
      language: 'zh-CN',
      online: 1,
      extraParams: { "locate": "CN", "recognizerMode": "long" }
    })

    // 创建回调对象
    this.asrEngine.setListener({
      onStart(sessionId: string, eventMessage: string) {
        console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
        this.voiceState = VoiceState.VOICING
      },
      // 事件回调
      onEvent(sessionId: string, eventCode: number, eventMessage: string) {
        console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`)
      },
      // 识别结果回调,包括中间结果和最终结果
      onResult: (sessionId: string, result: speechRecognizer.SpeechRecognitionResult) => {
        Logger.info(result, '识别结果')
        this.keyword = result.result
        this.onChange(this.keyword)
      },
      // 识别完成回调
      onComplete(sessionId: string, eventMessage: string) {
        console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
        this.voiceState = VoiceState.DEFAULT
      },
      // 错误回调,错误码通过本方法返回
      // 返回错误码1002200002,开始识别失败,重复启动startListening方法时触发
      // 更多错误码请参考错误码参考
      onError(sessionId: string, errorCode: number, errorMessage: string) {
        console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`)
        this.voiceState = VoiceState.VOICEOVER
        this.onComplete(this.keyword) // 通知父 完成识别 且结果给你
        this.keyword = ''
      }
    })

    // 开始监听
    this.asrEngine.startListening({
      sessionId: this.sessionId,
      audioInfo: {
        audioType: 'pcm',
        sampleRate: 16000,
        soundChannel: 1,
        sampleBit: 16
      }
    })
  }

  async closeRecord() {
    this.onComplete(this.keyword.replace(/[^\p{L}\p{N}\s]/gu, ''))
    this.keyword = ""
    this.voiceState = VoiceState.DEFAULT
    this.audioCapturer?.stop()
    this.audioCapturer?.release()
    this.audioCapturer = null
    this.asrEngine!.finish(this.sessionId);
    this.asrEngine!.shutdown()
    this.asrEngine = null
    this.sessionId = Date.now().toString() // 重新初始化sessionId

  }

  // 其他略
  aboutToAppear(): void {
    // 语音搜索组件加载时,首次向用户申请麦克风权限
    permissionPlugin.requestPermissions(['ohos.permission.MICROPHONE'])
  }

  build() {
    Column() {
      if (this.voiceState !== VoiceState.DEFAULT) {
        Column({ space: 16 }) {
          if (this.voiceState === VoiceState.VOICING) {
            Text('请说,我在聆听...')
              .fontSize(14)
          } else if (this.voiceState === VoiceState.VOICEOVER && this.keyword === '') {
            Text('未检测到语音,请长按按钮重试')
              .fontSize(14)
          }
          Text() {
            Span('你可以这样说:')
            Span('太阳眼镜/冬款连衣裙')
              .fontColor($r('[basic].color.gray'))
          }
          .fontSize(12)
        }
        .justifyContent(FlexAlign.Center)
        .height(150)
      }
      Blank()
      Button() {
        Row({ space: 4 }) {
          Image($r('sys.media.ohos_ic_public_voice'))
            .width(16)
            .aspectRatio(1)
            .fillColor($r('[basic].color.white'))
          if (this.voiceState === VoiceState.VOICING) {
            Text('松开立即搜索')
              .fontSize(14)
              .fontColor($r('[basic].color.white'))
          } else {
            Text('长按语音搜索')
              .fontSize(14)
              .fontColor($r('[basic].color.white'))
          }
        }
      }
      .padding({ left: 12, right: 12 })
      .height(36)
      .linearGradient({
        angle: 135,
        colors: [[$r('[basic].color.linear_begin'), 0], [$r('[basic].color.linear_end'), 1]]
      })
      .margin({ bottom: 16 })
      .gesture(LongPressGesture({
        // repeat: true,
        // fingers: 2
      })
        .onAction(() => {
          this.startRecord()
        })
        .onActionEnd(() => {
          this.closeRecord()
        })
        .onActionCancel(() => {
          this.closeRecord()
        })
      )
    }
    .layoutWeight(1)
    .width('100%')
    .backgroundImage($r('app.media.search_bg'))
    .backgroundImageSize(ImageSize.Contain)
    .backgroundImagePosition(Alignment.Bottom)
    .onVisibleAreaChange([0, 1], () => {
      this.keyword = ''
      this.voiceState = VoiceState.DEFAULT
    })
  }
}
Logo

讨论HarmonyOS开发技术,专注于API与组件、DevEco Studio、测试、元服务和应用上架分发等。

更多推荐