HarmonyOS5/ArkTs speechRecognizer语音识别,AudioCapturer音频采集/录音
·
文章目录
概要
整个过程是从获取权限,到录音,到语音识别
语音识别 技术栈是用了 speechRecognizer,文档:https://developer.huawei.com/consumer/cn/doc/harmonyos-references/hms-ai-speechrecognizer#section184134193313
录音 技术栈用的 audio.AudioCapturer,文档:https://developer.huawei.com/consumer/cn/doc/harmonyos-references/arkts-apis-audio-audiocapturer
整体技术流程
- 校验当前设备是否具备该语音识别能力:
canIUse('systemcapability.AI.SpeechRecognizer')
- 创建语音识别引擎
- 给语音识别引擎设置监听(onResult识别结果含过程 onComplete识别完成)
- 识别引擎开始
- 创建音频采集器
- 准备录音,监听readData事件 获取采集的声音 给识别引擎写入声音
- 音频采集器开始
技术细节
1,创建语音识别引擎
asrEngine: speechRecognizer.SpeechRecognitionEngine | null = null
// 创建语音识别引擎
this.asrEngine = await speechRecognizer.createEngine({
language: 'zh-CN',
online: 1,
extraParams: { "locate": "CN", "recognizerMode": "long" }
})
2,给语音识别引擎设置监听(onResult识别结果含过程 onComplete识别完成)
// 创建回调对象
this.asrEngine.setListener({
onStart(sessionId: string, eventMessage: string) {
console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
this.voiceState = VoiceState.VOICING
},
// 事件回调
onEvent(sessionId: string, eventCode: number, eventMessage: string) {
console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`)
},
// 识别结果回调,包括中间结果和最终结果
onResult: (sessionId: string, result: speechRecognizer.SpeechRecognitionResult) => {
Logger.info(result, '识别结果')
this.keyword = result.result
this.onChange(this.keyword)
},
// 识别完成回调
onComplete(sessionId: string, eventMessage: string) {
console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
this.voiceState = VoiceState.DEFAULT
},
// 错误回调,错误码通过本方法返回
// 返回错误码1002200002,开始识别失败,重复启动startListening方法时触发
// 更多错误码请参考错误码参考
onError(sessionId: string, errorCode: number, errorMessage: string) {
console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`)
this.voiceState = VoiceState.VOICEOVER
this.onComplete(this.keyword) // 通知父 完成识别 且结果给你
this.keyword = ''
}
})
- 识别引擎开始
// 开始识别
this.asrEngine.startListening({
sessionId: this.sessionId,
audioInfo: {
audioType: 'pcm',
sampleRate: 16000,
soundChannel: 1,
sampleBit: 16
}
})
- 创建音频采集器
// 创建音频采集器
this.audioCapturer = await audio.createAudioCapturer({
streamInfo: {
samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率。
channels: audio.AudioChannel.CHANNEL_2, // 通道。
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式。
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式。
},
capturerInfo: {
source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型:Mic音频源。根据业务场景配置,参考SourceType。
capturerFlags: 0 // 音频采集器标志。
}
});
- 准备录音,监听readData事件 获取采集的声音 给识别引擎写入声音
async startRecord() {
this.startEngine()
//1,配置音频采集参数并创建audioCapturer实例
this.audioCapturer = await audio.createAudioCapturer({
streamInfo: {
samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率。
channels: audio.AudioChannel.CHANNEL_1, // 通道。
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式。
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式。
},
capturerInfo: {
source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型:Mic音频源。根据业务场景配置,参考SourceType。
capturerFlags: 0 // 音频采集器标志。
}
})
//2,监听
this.audioCapturer?.on('readData', (buffer) => {
//获取采集的声音 给识别引擎写入声音
this.asrEngine?.writeAudio(this.sessionId, new Uint8Array(buffer))
})
this.voiceState = VoiceState.VOICING
}
- 音频采集器开始
// 开始采集
await this.audioCapturer.start()
小demo案例,一些业务变量可以去掉一下
import { audio } from '@kit.AudioKit'
import { speechRecognizer } from '@kit.CoreSpeechKit'
import { Logger, permissionPlugin } from 'basic'
import { promptAction, window } from '@kit.ArkUI'
import { fileIo } from '@kit.CoreFileKit'
import { BusinessError } from '@kit.BasicServicesKit'
export enum VoiceState {
DEFAULT,
VOICING,
VOICEOVER
}
@ComponentV2
export struct AudioSearchComp {
@Local voiceState: VoiceState = VoiceState.DEFAULT
@Local keyword: string = ''
audioCapturer: audio.AudioCapturer | null = null
asrEngine: speechRecognizer.SpeechRecognitionEngine | null = null
sessionId: string = Date.now().toString();
@Event onChange: (keyword: string) => void = () => {
}
@Event onComplete: (keyword: string) => void = () => {
}
async startRecord() {
this.startEngine()
//1,配置音频采集参数并创建audioCapturer实例
this.audioCapturer = await audio.createAudioCapturer({
streamInfo: {
samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率。
channels: audio.AudioChannel.CHANNEL_1, // 通道。
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式。
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式。
},
capturerInfo: {
source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型:Mic音频源。根据业务场景配置,参考SourceType。
capturerFlags: 0 // 音频采集器标志。
}
})
//2,采集
this.audioCapturer?.on('readData', (buffer) => {
//获取采集的声音 给识别引擎写入声音
this.asrEngine?.writeAudio(this.sessionId, new Uint8Array(buffer))
})
// 开始采集
this.audioCapturer.start()
this.voiceState = VoiceState.VOICING
}
async startEngine() {
if (!canIUse('SystemCapability.AI.SpeechRecognizer')) {
promptAction.showToast({ message: "当前设备不具备这个能力哦" })
}
// 创建语音识别引擎
this.asrEngine = await speechRecognizer.createEngine({
language: 'zh-CN',
online: 1,
extraParams: { "locate": "CN", "recognizerMode": "long" }
})
// 创建回调对象
this.asrEngine.setListener({
onStart(sessionId: string, eventMessage: string) {
console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
this.voiceState = VoiceState.VOICING
},
// 事件回调
onEvent(sessionId: string, eventCode: number, eventMessage: string) {
console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`)
},
// 识别结果回调,包括中间结果和最终结果
onResult: (sessionId: string, result: speechRecognizer.SpeechRecognitionResult) => {
Logger.info(result, '识别结果')
this.keyword = result.result
this.onChange(this.keyword)
},
// 识别完成回调
onComplete(sessionId: string, eventMessage: string) {
console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`)
this.voiceState = VoiceState.DEFAULT
},
// 错误回调,错误码通过本方法返回
// 返回错误码1002200002,开始识别失败,重复启动startListening方法时触发
// 更多错误码请参考错误码参考
onError(sessionId: string, errorCode: number, errorMessage: string) {
console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`)
this.voiceState = VoiceState.VOICEOVER
this.onComplete(this.keyword) // 通知父 完成识别 且结果给你
this.keyword = ''
}
})
// 开始监听
this.asrEngine.startListening({
sessionId: this.sessionId,
audioInfo: {
audioType: 'pcm',
sampleRate: 16000,
soundChannel: 1,
sampleBit: 16
}
})
}
async closeRecord() {
this.onComplete(this.keyword.replace(/[^\p{L}\p{N}\s]/gu, ''))
this.keyword = ""
this.voiceState = VoiceState.DEFAULT
this.audioCapturer?.stop()
this.audioCapturer?.release()
this.audioCapturer = null
this.asrEngine!.finish(this.sessionId);
this.asrEngine!.shutdown()
this.asrEngine = null
this.sessionId = Date.now().toString() // 重新初始化sessionId
}
// 其他略
aboutToAppear(): void {
// 语音搜索组件加载时,首次向用户申请麦克风权限
permissionPlugin.requestPermissions(['ohos.permission.MICROPHONE'])
}
build() {
Column() {
if (this.voiceState !== VoiceState.DEFAULT) {
Column({ space: 16 }) {
if (this.voiceState === VoiceState.VOICING) {
Text('请说,我在聆听...')
.fontSize(14)
} else if (this.voiceState === VoiceState.VOICEOVER && this.keyword === '') {
Text('未检测到语音,请长按按钮重试')
.fontSize(14)
}
Text() {
Span('你可以这样说:')
Span('太阳眼镜/冬款连衣裙')
.fontColor($r('[basic].color.gray'))
}
.fontSize(12)
}
.justifyContent(FlexAlign.Center)
.height(150)
}
Blank()
Button() {
Row({ space: 4 }) {
Image($r('sys.media.ohos_ic_public_voice'))
.width(16)
.aspectRatio(1)
.fillColor($r('[basic].color.white'))
if (this.voiceState === VoiceState.VOICING) {
Text('松开立即搜索')
.fontSize(14)
.fontColor($r('[basic].color.white'))
} else {
Text('长按语音搜索')
.fontSize(14)
.fontColor($r('[basic].color.white'))
}
}
}
.padding({ left: 12, right: 12 })
.height(36)
.linearGradient({
angle: 135,
colors: [[$r('[basic].color.linear_begin'), 0], [$r('[basic].color.linear_end'), 1]]
})
.margin({ bottom: 16 })
.gesture(LongPressGesture({
// repeat: true,
// fingers: 2
})
.onAction(() => {
this.startRecord()
})
.onActionEnd(() => {
this.closeRecord()
})
.onActionCancel(() => {
this.closeRecord()
})
)
}
.layoutWeight(1)
.width('100%')
.backgroundImage($r('app.media.search_bg'))
.backgroundImageSize(ImageSize.Contain)
.backgroundImagePosition(Alignment.Bottom)
.onVisibleAreaChange([0, 1], () => {
this.keyword = ''
this.voiceState = VoiceState.DEFAULT
})
}
}
更多推荐
所有评论(0)