简介:本文详细解析Web Speech API的语音合成与识别功能实现,通过代码示例和场景分析,帮助开发者快速构建网页端语音交互应用,涵盖基础用法、优化策略及跨浏览器兼容方案。
Web Speech API 是W3C推出的浏览器原生语音交互标准,包含语音合成(Speech Synthesis)和语音识别(Speech Recognition)两大核心模块。该API自2012年提出以来,已获得Chrome、Edge、Firefox、Safari等主流浏览器的广泛支持,无需任何插件即可实现跨平台语音功能。
Web Speech API采用异步事件驱动模型,通过SpeechSynthesis和SpeechRecognition两个主接口分别处理语音输出和输入。其设计遵循渐进增强原则,在不支持的浏览器中可优雅降级为文本交互。关键对象包括:
SpeechSynthesisUtterance(语音内容单元)、SpeechSynthesis(控制器)SpeechRecognition(识别控制器)、SpeechGrammar(语法规则)
// 创建语音合成实例const synth = window.speechSynthesis;// 配置语音内容const utterance = new SpeechSynthesisUtterance('您好,欢迎使用语音合成功能');// 设置语音参数utterance.rate = 1.0; // 语速(0.1-10)utterance.pitch = 1.0; // 音高(0-2)utterance.volume = 1.0; // 音量(0-1)utterance.lang = 'zh-CN'; // 中文普通话// 执行语音合成synth.speak(utterance);
// 获取可用语音列表function listVoices() {const voices = synth.getVoices();console.log('可用语音:', voices.map(v => `${v.name} (${v.lang})`));// 监听语音库加载事件(异步)synth.onvoiceschanged = listVoices;}// 优先选择中文语音function getChineseVoice() {const voices = synth.getVoices();return voices.find(v => v.lang.includes('zh-CN')) || voices[0];}
// 暂停/继续控制let isPaused = false;document.getElementById('pauseBtn').addEventListener('click', () => {isPaused = !isPaused;synth.pause() ? synth.resume() : synth.pause();});// 取消当前语音document.getElementById('cancelBtn').addEventListener('click', () => {synth.cancel();});
error事件处理语音合成失败lang属性实现国际化
// 检查浏览器支持if (!('webkitSpeechRecognition' in window) &&!('SpeechRecognition' in window)) {alert('您的浏览器不支持语音识别');throw new Error('SpeechRecognition not supported');}// 创建识别实例(兼容不同浏览器前缀)const SpeechRecognition = window.SpeechRecognition ||window.webkitSpeechRecognition;const recognition = new SpeechRecognition();// 配置识别参数recognition.continuous = false; // 单次识别recognition.interimResults = true; // 显示临时结果recognition.lang = 'zh-CN'; // 中文识别// 启动识别recognition.start();// 处理识别结果recognition.onresult = (event) => {const transcript = Array.from(event.results).map(result => result[0].transcript).join('');console.log('识别结果:', transcript);// 显示最终结果(当结果稳定时)if (event.results[event.results.length - 1].isFinal) {document.getElementById('output').textContent = transcript;}};
// 创建语法规则(限制识别词汇)const grammar = `#JSGF V1.0;grammar commands;public <command> = 打开 | 关闭 | 保存 | 取消;`;const speechGrammarList = new window.SpeechGrammarList();speechGrammarList.addFromString(grammar, 1); // 权重1recognition.grammars = speechGrammarList;
// 识别状态事件recognition.onstart = () => console.log('识别开始');recognition.onend = () => console.log('识别结束');recognition.onerror = (event) => console.error('识别错误:', event.error);// 动态控制document.getElementById('toggleBtn').addEventListener('click', () => {recognition.abort(); // 停止当前识别recognition.start(); // 重新开始});
maxAlternatives获取多个识别结果continuous=true实现长语音识别interimResults显示中间结果
function getSpeechRecognition() {const prefixes = ['', 'webkit', 'moz', 'ms', 'o'];for (let i = 0; i < prefixes.length; i++) {const name = `${prefixes[i]}SpeechRecognition`;if (window[name]) {return window[name];}}return null;}
// 不支持时的备用方案if (!getSpeechRecognition()) {document.getElementById('speechUI').style.display = 'none';document.getElementById('fallbackUI').style.display = 'block';// 可以集成第三方Web SDK作为备选}
<div id="app"><h1>语音交互演示</h1><div class="section"><h2>语音合成</h2><textarea id="synthesisText" rows="3" placeholder="输入要合成的文本"></textarea><button id="speakBtn">播放语音</button><button id="pauseBtn">暂停</button><button id="cancelBtn">停止</button><select id="voiceSelect"></select></div><div class="section"><h2>语音识别</h2><button id="startBtn">开始识别</button><button id="stopBtn">停止识别</button><div id="recognitionResult" class="result-box"></div></div></div>
document.addEventListener('DOMContentLoaded', () => {// 语音合成初始化const synth = window.speechSynthesis;const voiceSelect = document.getElementById('voiceSelect');const speakBtn = document.getElementById('speakBtn');const pauseBtn = document.getElementById('pauseBtn');const cancelBtn = document.getElementById('cancelBtn');const synthesisText = document.getElementById('synthesisText');// 填充语音选择列表function populateVoiceList() {voices = synth.getVoices();voiceSelect.innerHTML = voices.filter(v => v.lang.includes('zh') || v.lang.includes('en')).map(v => `<option value="${v.name}">${v.name} (${v.lang})</option>`).join('');}populateVoiceList();if (typeof speechSynthesis.onvoiceschanged !== 'undefined') {speechSynthesis.onvoiceschanged = populateVoiceList;}// 语音合成事件speakBtn.addEventListener('click', () => {const selectedVoice = voices.find(v => v.name === voiceSelect.value);const utterance = new SpeechSynthesisUtterance(synthesisText.value);utterance.voice = selectedVoice || getChineseVoice();synth.speak(utterance);});pauseBtn.addEventListener('click', () => {synth.paused ? synth.resume() : synth.pause();});cancelBtn.addEventListener('click', () => {synth.cancel();});// 语音识别初始化const SpeechRecognition = getSpeechRecognition();if (!SpeechRecognition) {alert('您的浏览器不支持语音识别功能');return;}const recognition = new SpeechRecognition();recognition.continuous = false;recognition.interimResults = true;recognition.lang = 'zh-CN';const startBtn = document.getElementById('startBtn');const stopBtn = document.getElementById('stopBtn');const resultBox = document.getElementById('recognitionResult');startBtn.addEventListener('click', () => {recognition.start();resultBox.textContent = '正在聆听...';});stopBtn.addEventListener('click', () => {recognition.stop();});recognition.onresult = (event) => {let interimTranscript = '';let finalTranscript = '';for (let i = event.resultIndex; i < event.results.length; i++) {const transcript = event.results[i][0].transcript;if (event.results[i].isFinal) {finalTranscript += transcript;} else {interimTranscript += transcript;}}resultBox.innerHTML = `<div class="interim">${interimTranscript}</div><div class="final">${finalTranscript}</div>`;};recognition.onerror = (event) => {resultBox.textContent = `错误: ${event.error}`;};recognition.onend = () => {if (!finalTranscript) {resultBox.textContent = '识别已结束';}};});
用户体验设计:
性能优化:
maxAlternatives)安全考虑:
测试策略:
扩展方向:
webkitSpeechRecognition vs SpeechRecognition)
// 预加载语音库function preloadVoices() {const utterance = new SpeechSynthesisUtterance('');synth.speak(utterance);synth.cancel();}
SpeechGrammar限制词汇范围lang参数匹配用户口音continuous=false进行短语音识别Web Speech API扩展:
与其他Web API集成:
标准化进展:
通过系统掌握Web Speech API的实现方法,开发者可以轻松为网页应用添加强大的语音交互功能,显著提升用户体验和可访问性。本文提供的完整实现方案和优化策略,可作为实际项目开发的可靠参考。