JavaScript/jQuery Speech To Text

The DevExtreme JavaScript SpeechToText component allows you to integrate voice input into your DevExtreme-powered app. The component implements the Web Speech API SpeechRecognition interface and supports custom speech recognizers.

Also available for:

ASP.NET Core ASP.NET MVC 5

Backend API

$(() => { const shouldUpdateType = () => displayMode.option('value') === 'Custom'; let state = 'initial'; const speechToText = $('#speech-to-text') .dxSpeechToText({ type: 'default', hint: 'Start voice recognition', speechRecognitionConfig: { interimResults: true, continuous: false, }, onStartClick: ({ component }) => { if (!window.SpeechRecognition && !window.webkitSpeechRecognition) { DevExpress.ui.notify({ message: 'The browser does not support Web Speech API (SpeechRecognition).', type: 'error', displayTime: 7000, position: 'bottom center', width: 'auto', }); return; } state = 'listening'; component.option('hint', 'Stop voice recognition'); if (!shouldUpdateType()) { return; } type.option('value', 'Danger'); }, onEnd: ({ component }) => { state = 'initial'; component.option('hint', 'Start voice recognition'); if (!shouldUpdateType()) { return; } type.option('value', 'Default'); }, onResult: ({ event }) => { const { results } = event; const resultText = Object.values(results) .map((resultItem) => resultItem[0].transcript.trim()) .join(' '); textArea.option('value', resultText); }, }) .dxSpeechToText('instance'); const textArea = $('#text-area') .dxTextArea({ width: 360, height: 120, placeholder: 'Recognized text will appear here...', inputAttr: { 'aria-label': 'Recognized Text' }, onValueChanged: ({ value }) => { clearButton.option('disabled', !value); }, }) .dxTextArea('instance'); const clearButton = $('#clear-button') .dxButton({ text: 'Clear', disabled: true, onClick: () => { textArea.option('value', ''); }, }) .dxButton('instance'); const displayMode = $('#display-mode') .dxSelectBox({ items: ['Icon Only', 'Text and Icon', 'Custom'], value: 'Icon Only', inputAttr: { 'aria-label': 'Display Mode' }, onValueChanged: ({ value }) => { const $speechToText = speechToText.$element(); const isCustomMode = value === 'Custom'; stylingMode.option('disabled', isCustomMode); type.option('disabled', isCustomMode); $speechToText.removeClass('custom-button'); if (value === 'Text and Icon') { speechToText.option({ startText: 'Dictate', stopText: 'Stop', }); return; } speechToText.option({ startText: '', stopText: '' }); if (isCustomMode) { stylingMode.option('value', 'Contained'); type.option('value', state === 'initial' ? 'Default' : 'Danger'); $speechToText.addClass('custom-button'); } }, }) .dxSelectBox('instance'); const stylingMode = $('#styling-mode') .dxSelectBox({ items: ['Contained', 'Outlined', 'Text'], value: 'Contained', inputAttr: { 'aria-label': 'Styling Mode' }, onValueChanged: ({ value }) => { speechToText.option('stylingMode', value.toLowerCase()); }, }) .dxSelectBox('instance'); const type = $('#type') .dxSelectBox({ items: ['Normal', 'Success', 'Default', 'Danger'], value: 'Default', inputAttr: { 'aria-label': 'Type' }, onValueChanged: ({ value }) => { speechToText.option('type', value.toLowerCase()); }, }) .dxSelectBox('instance'); $('#disabled').dxSwitch({ onValueChanged: ({ value }) => { speechToText.option('disabled', value); }, }); $('#language').dxSelectBox({ items: ['Auto-detect', 'English', 'Spanish', 'French', 'German'], value: 'Auto-detect', inputAttr: { 'aria-label': 'Language' }, onValueChanged: ({ value }) => { speechToText.option('speechRecognitionConfig.lang', langMap[value]); }, }); $('#interim-results').dxSwitch({ value: true, onValueChanged: ({ value }) => { speechToText.option('speechRecognitionConfig.interimResults', value); }, }); $('#continuous-recognition').dxSwitch({ onValueChanged: ({ value }) => { speechToText.option('speechRecognitionConfig.continuous', value); }, }); $('#animation').dxSwitch({ value: true, onValueChanged: ({ value }) => { speechToText.$element().toggleClass('animation-disabled', !value); }, }); });

<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml" lang="en"> <head> <title>DevExtreme Demo</title> <meta http-equiv="X-UA-Compatible" content="IE=edge" /> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=5.0" /> <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> <script>window.jQuery || document.write(decodeURIComponent('%3Cscript src="js/jquery.min.js"%3E%3C/script%3E'))</script> <link rel="stylesheet" type="text/css" href="https://cdn3.devexpress.com/jslib/25.2.7/css/dx.light.css" /> <script src="js/dx.all.js?v=25.2.7"></script> <link rel="stylesheet" type="text/css" href="styles.css" /> <script src="data.js"></script> <script src="index.js"></script> </head> <body class="dx-viewport"> <div class="demo-container"> <div class="speech-to-text-container"> <span>Use voice recognition (speech to text)</span> <div id="speech-to-text"></div> <div id="text-area"></div> <div id="clear-button"></div> </div> <div class="options"> <div class="caption">Options</div> <div class="option"> <div>Display Mode</div> <div id="display-mode"></div> </div> <div class="option"> <div>Styling Mode</div> <div id="styling-mode"></div> </div> <div class="option"> <div>Type</div> <div id="type"></div> </div> <div class="switch"> <div id="disabled"></div> <span>Disabled</span> </div> <div class="option-separator"></div> <div class="option"> <div>Language</div> <div id="language"></div> </div> <div class="switch"> <div id="interim-results"></div> <span>Interim Results</span> </div> <div class="switch"> <div id="continuous-recognition"></div> <span>Continuous Recognition</span> </div> <div class="option-separator"></div> <div class="switch"> <div id="animation"></div> <span>Animation</span> </div> </div> </div> </body> </html>

.demo-container { display: flex; gap: 20px; height: 640px; } .speech-to-text-container { display: flex; flex-direction: column; row-gap: 16px; flex-grow: 1; align-items: center; justify-content: center; } #text-area { margin-top: 16px; } .options { display: flex; flex-direction: column; flex-shrink: 0; width: 300px; box-sizing: border-box; padding: 20px; background-color: rgba(191, 191, 191, 0.15); gap: 16px; } .caption { font-weight: 500; font-size: 18px; } .option { display: flex; flex-direction: column; row-gap: 4px; } .switch { display: flex; align-items: center; column-gap: 8px; } .option-separator { border-bottom: 1px solid var(--dx-color-border); } #speech-to-text.animation-disabled { animation: none; } #speech-to-text.custom-button { border-radius: 2rem; }

const langMap = { "Auto-detect": '', "English": 'en-US', "Spanish": 'es-ES', "French": 'fr-FR', "German": 'de-DE', };

You can integrate JavaScript SpeechToText with any text input, including editors available in the DevExtreme product line. Simply set a component's value property to transcribed text. JavaScript SpeechToText returns transcribed text in the onResult handler as a user speaks. When speech stops, the component calls the onEnd handler and switches from a "listening" state to the initial state. JavaScript SpeechToText implements state-dependent icons (startIcon/stopIcon), text (startText/stopText), and click handlers (onStartClick/onStopClick).

For a complete overview of JavaScript SpeechToText options (including Web Speech API options), refer to the following topic: JavaScript SpeechToText API Reference.