Dictation Application

This application records user speech using record tag, converts it to text using external Webreco API call and logs the dictated text.

Try It!
  1. Set your Application URL to http://studio.247-inc.net/library2/code/ex-122/index.vxml
  2. Call the number shown in VXML Tools page
  3. Sign in, and play!

  • Record User Speech.
    The application prompts users to speak. This audio is recorded using record tag and is played back to user for confirmation.

  • Using Webreco API.
    Webreco API is invoked using data tag with required parameters.
    Following are configurable variables which can be modified for using WebReco API.

    webrecoUrl Specifies Url for webreco to use. ex: 'https://webreco.in.tellme.com/reco'
    grammar1 Specifies grammar to be used. ex: 'http://gs1.tm-dev.reco.tellme.com/websearch'
    clientId Specifies webreco authentication Id. ex: 'tm-dev'
    sharedKey specifies webreco authentication shared key.

    MIME body containing audio is send in webreco API.

  • Parsing EMMA response.
    The response from above API is in EMMA format, which needs to be parsed to get the dictation text DictUtil.js has functions to parse EMMA result.

The VoiceXML code for this example follows:

<?xml version="1.0"?>
<vxml version="2.1" xmlns="http://www.w3.org/2001/vxml">

<!-- App Purpose : Dictation service to enable speech to text conversion -->

<script src="js/hmac-sha256.js" />
<script src="js/enc-base64-min.js" />
<script src="js/lib/xml/XMLObject.js" />
<script src="js/DictUtil.js" />
<!-- Set these variables with webreco url,  authentication id/key and grammar to be used. -->
<var name="clientId" expr="'tm-dev'"/>
<var name="sharedKey" expr="'***************'"/>
<var name="webrecoUrl" expr="'https://webreco.in.tellme.com/reco'"/>
<var name="grammar1" expr="'http://gs1.tm-dev.reco.tellme.com/websearch'" />

<var name="audio" />
   <form id="main">
      <block name="block1">
        <audio src=""> Welcome to dication sample </audio>
        <goto next="#record_user_audio" />

   <form id="record_user_audio">
       <record name="user_audio" maxtime="60s" dtmfterm="true" beep="true" type="audio/wav">
            <!-- At the tone, please record your personal greeting. When you're done, press pound. -->
            Go ahead 

         <!-- if the user doesn't say anything 
            within finalsilence, catch the noinput -->
         Sorry. I didn't hear you. Now returning to the main menu.
         <exit />

             <!-- demo record shadow variables -->
             <log>Dict: recording size =  <value expr="user_audio$.size"/> bytes.</log>
             <log>Dict: recording duration = <value expr="user_audio$.duration"/> milliseconds.</log>
             <log>Dict: dtmf key = <value expr="(user_audio$.termchar? user_audio$.termchar : 'none')"/></log>
             <log>Dict: maxtime = <value expr="user_audio$.maxtime"/>.</log>
             <log>Dict: ALL = <value expr="user_audio"/>.</log>


	<!-- PlayBack Audio to User-->
	<block name="playback">
		Rewinding back to you.
		<break size="small"/>
		<audio expr="user_audio"/>
		<assign name="audio" expr="user_audio"/>

	<!-- Use Webreco Api to get Dictation Text--> 
	<block name="UseWebrecoApi">   

		<script> <![CDATA[
			 var date = new Date();
			 var tm = date.toUTCString();
			 var data = tm+'\n/'+clientId+'/WebReco';
			 var digest = CryptoJS.HmacSHA256(data,sharedKey);
			 var auth = CryptoJS.enc.Base64.stringify(digest);
			 var authorization='SharedKeyLite:'+tm+':'+clientId+':'+auth;
		]]> </script>

		<data name="dictText" srcexpr="webrecoUrl" namelist="authorization grammar1 audio" method="POST" 
			enctype="multipart/form-data" fetchtimeout="20s"/>                    

		<script> <![CDATA[
			 var emmaResult = getEmmaText(dictText);
			 var emmaResultstate= emmaResult.state;  
			 var emmaResulterrText = emmaResult.errText ;  
			 var emmaResultText= emmaResult.text ;  

		]]> </script>

	<!--Printing Result -->

	<if cond = "emmaResultstate==EmmaResultState.SUCCESS" >
	  result is success 
	   <log>Dictation Text: <value expr="emmaResultText"/></log>
	<elseif cond= "emmaResultstate==EmmaResultState.NO_INPUT" />
	  result is No Input 
	<elseif cond = "emmaResultstate==EmmaResultState.UNINTERPRETED" />
	  result could not be interpreted  
	  Error Encountered
	   <log>Dictation Error :<value expr="emmaResulterrText"/></log>


This source code for DictUtil.js follows:

 * Copyright (c) 2015 247-INC.  All Rights Reserved.
 * 247-INC Confidential.  Do Not Distribute.
 * @author     Ajoy Agrawal
 * @version    $id: $

 * Parse EMMA from webreco to get Dictated text
var EmmaResultState = {
    SUCCESS: 1,
    NO_INPUT: 3,

function EmmaResult(lstate, ltext, lerrText)
    this.state = lstate;
    this.text = ltext;
    this.errText = lerrText;

EmmaResult.prototype = new Object();
EmmaResult.prototype.constructor = EmmaResult;

EmmaResult.prototype.set = function (lstate, ltext, lerrText) 
    this.state = lstate;
    this.text = ltext;
    this.errText = lerrText;

EmmaResult.prototype.setText = function (ltext) 
    this.state = EmmaResultState.SUCCESS;
    this.text = ltext;
    this.errText = "";

EmmaResult.prototype.setErr = function (lerrText, lstate) 
    this.errText = lerrText;
    this.text = "";
    if ( lstate != undefined)
        this.errText = lerrText;

//Function to parse EMMA response to get reco text.
function getEmmaText(emmaResponseText) 
    var emmaResult = new EmmaResult(EmmaResultState.UNKNOWN,"","");
    var xMLObject = new XMLObject(emmaResponseText);
    var emma_xml= xMLObject.data['emma:emma'];

    //validate xmlns:tm
    var TM_EMMA_TELLME_NAMESPACE = "http://www.tellme.com/ns/2009/01/emma";
    if (emma_xml[0]['@xmlns:tm'] != TM_EMMA_TELLME_NAMESPACE )
        emmaResult.setErr("xmlns:tm attribute of emma:emma is invalid, has to be: "+TM_EMMA_TELLME_NAMESPACE ,EmmaResultState.ERROR);
        return emmaResult;

    //validate tm:version == 1.0
    var TM_EMMA_TELLME_NAMESPACE = "http://www.tellme.com/ns/2009/01/emma";
    if (emma_xml[0]['@tm:version'] != "1.0")
        emmaResult.setErr("Unsupported tm:version: "+emma_xml[0]['@tm:version'] +", has to be: 1.0",EmmaResultState.ERROR);
        return emmaResult;

    //emma:interpretation : To handle emma:uninterpreted / emma:no-input
    var emma_interpretation = emma_xml[0]['emma:interpretation'] ;
    if (emma_interpretation)
        if (emma_interpretation[0]["@emma:uninterpreted"] == "true" )
            // Return this value??
        if (emma_interpretation[0]["@emma:no-input"] == "true" )
            // Return this value??

        // Print the Error Text
        if (emma_xml[0]['emma:info'] && emma_xml[0]['emma:info'][0]["ms:error"] && emma_xml[0]['emma:info'][0]["ms:error"][0]["$text"])


    // Handle "emma:one-of"
    // NOTE: in tm:version="1.0", 'match' case fall under this block,
    // i.e. all interpretations are wrapped by <emma:one-of>
    var emma_one_of = emma_xml[0]['emma:one-of'] ;
    if (emma_one_of )
        var emma_one_of_interpretation = emma_one_of[0]['emma:interpretation'] ;
        if (emma_one_of_interpretation)
            vxmllog("Dictation Data:: confidence=" +emma_one_of_interpretation[0]["@emma:confidence"]
                    + " ;duration=" +emma_one_of_interpretation[0]["@emma:duration"]
                    + " ;text=" +emma_one_of_interpretation[0]["@emma:tokens"]) ;
            return emmaResult;
            emmaResult.setErr("emma:interpretation missing in emma:one-of",EmmaResultState.ERROR);
            return emmaResult;

    emmaResult.setErr("emma:interpretation missing",EmmaResultState.ERROR);
    return emmaResult;

[24]7 Inc.| Terms of Service| Privacy Policy| General Disclaimers