Dictation Application

This application records user speech using record tag, converts it to text using external Webreco API call and logs the dictated text.

Try It!
  1. Set your Application URL to http://studio.247-inc.net/library2/code/ex-122/index.vxml
  2. Call the number shown in VXML Tools page
  3. Sign in, and play!

How it Works

VoiceXML 2.0 Code

The VoiceXML code for this example follows:

<?xml version="1.0"?>
<vxml version="2.1" xmlns="http://www.w3.org/2001/vxml">

<!-- App Purpose : Dictation service to enable speech to text conversion -->

<script src="js/hmac-sha256.js" />
<script src="js/enc-base64-min.js" />
<script src="js/lib/xml/XMLObject.js" />
<script src="js/DictUtil.js" />
  
<!-- Set these variables with webreco url,  authentication id/key and grammar to be used. -->
<var name="clientId" expr="'tm-dev'"/>
<var name="sharedKey" expr="'***************'"/>
<var name="webrecoUrl" expr="'https://webreco.in.tellme.com/reco'"/>
<var name="grammar1" expr="'http://gs1.tm-dev.reco.tellme.com/websearch'" />


<var name="audio" />
   <form id="main">
      <block name="block1">
        <audio src=""> Welcome to dication sample </audio>
        <goto next="#record_user_audio" />
	  </block>
   </form>

   <form id="record_user_audio">
       <record name="user_audio" maxtime="60s" dtmfterm="true" beep="true" type="audio/wav">
         <prompt>
            <!-- At the tone, please record your personal greeting. When you're done, press pound. -->
            Go ahead 
         </prompt>

         <!-- if the user doesn't say anything 
            within finalsilence, catch the noinput -->
         <noinput>
         Sorry. I didn't hear you. Now returning to the main menu.
         <exit />
         </noinput>

         <filled>
             <!-- demo record shadow variables -->
             <log>Dict: recording size =  <value expr="user_audio$.size"/> bytes.</log>
             <log>Dict: recording duration = <value expr="user_audio$.duration"/> milliseconds.</log>
             <log>Dict: dtmf key = <value expr="(user_audio$.termchar? user_audio$.termchar : 'none')"/></log>
             <log>Dict: maxtime = <value expr="user_audio$.maxtime"/>.</log>
             <log>Dict: ALL = <value expr="user_audio"/>.</log>

         </filled>
      </record>

	<!-- PlayBack Audio to User-->
	<block name="playback">
		Rewinding back to you.
		<break size="small"/>
		<audio expr="user_audio"/>
		<assign name="audio" expr="user_audio"/>
	</block>

	<!-- Use Webreco Api to get Dictation Text--> 
	<block name="UseWebrecoApi">   

		<script> <![CDATA[
			 var date = new Date();
			 var tm = date.toUTCString();
			 var data = tm+'\n/'+clientId+'/WebReco';
			 var digest = CryptoJS.HmacSHA256(data,sharedKey);
			 var auth = CryptoJS.enc.Base64.stringify(digest);
			 var authorization='SharedKeyLite:'+tm+':'+clientId+':'+auth;
		]]> </script>

		<data name="dictText" srcexpr="webrecoUrl" namelist="authorization grammar1 audio" method="POST" 
			enctype="multipart/form-data" fetchtimeout="20s"/>                    

		<script> <![CDATA[
			 var emmaResult = getEmmaText(dictText);
			 var emmaResultstate= emmaResult.state;  
			 var emmaResulterrText = emmaResult.errText ;  
			 var emmaResultText= emmaResult.text ;  

		]]> </script>

	<!--Printing Result -->

	<if cond = "emmaResultstate==EmmaResultState.SUCCESS" >
	  result is success 
	   <log>Dictation Text: <value expr="emmaResultText"/></log>
	<elseif cond= "emmaResultstate==EmmaResultState.NO_INPUT" />
	  result is No Input 
	<elseif cond = "emmaResultstate==EmmaResultState.UNINTERPRETED" />
	  result could not be interpreted  
	<else/>
	  Error Encountered
	   <log>Dictation Error :<value expr="emmaResulterrText"/></log>
	</if>

	</block>
   </form>
</vxml>


This source code for DictUtil.js follows:

/*
 * Copyright (c) 2015 247-INC.  All Rights Reserved.
 * 247-INC Confidential.  Do Not Distribute.
 *
 * @author     Ajoy Agrawal
 * @version    $id: $
 */


/*
 * Parse EMMA from webreco to get Dictated text
 */
var EmmaResultState = {
    UNKNOWN:0,
    SUCCESS: 1,
    UNINTERPRETED  : 2,
    NO_INPUT: 3,
    ERROR:4
};

function EmmaResult(lstate, ltext, lerrText)
{
    this.state = lstate;
    this.text = ltext;
    this.errText = lerrText;
}

EmmaResult.prototype = new Object();
EmmaResult.prototype.constructor = EmmaResult;


EmmaResult.prototype.set = function (lstate, ltext, lerrText) 
{
    this.state = lstate;
    this.text = ltext;
    this.errText = lerrText;
}

EmmaResult.prototype.setText = function (ltext) 
{
    this.state = EmmaResultState.SUCCESS;
    this.text = ltext;
    this.errText = "";
}

EmmaResult.prototype.setErr = function (lerrText, lstate) 
{
    this.errText = lerrText;
    this.text = "";
    if ( lstate != undefined)
    {
        this.errText = lerrText;
    }
}

//Function to parse EMMA response to get reco text.
function getEmmaText(emmaResponseText) 
{
    var emmaResult = new EmmaResult(EmmaResultState.UNKNOWN,"","");
   
    var xMLObject = new XMLObject(emmaResponseText);
    var emma_xml= xMLObject.data['emma:emma'];

    //validate xmlns:tm
    var TM_EMMA_TELLME_NAMESPACE = "http://www.tellme.com/ns/2009/01/emma";
    if (emma_xml[0]['@xmlns:tm'] != TM_EMMA_TELLME_NAMESPACE )
    {
        emmaResult.setErr("xmlns:tm attribute of emma:emma is invalid, has to be: "+TM_EMMA_TELLME_NAMESPACE ,EmmaResultState.ERROR);
        return emmaResult;
    }

    //validate tm:version == 1.0
    var TM_EMMA_TELLME_NAMESPACE = "http://www.tellme.com/ns/2009/01/emma";
    if (emma_xml[0]['@tm:version'] != "1.0")
    {
        emmaResult.setErr("Unsupported tm:version: "+emma_xml[0]['@tm:version'] +", has to be: 1.0",EmmaResultState.ERROR);
        return emmaResult;
    }

    //emma:interpretation : To handle emma:uninterpreted / emma:no-input
    var emma_interpretation = emma_xml[0]['emma:interpretation'] ;
    if (emma_interpretation)
    {
        if (emma_interpretation[0]["@emma:uninterpreted"] == "true" )
        {
            // Return this value??
            emmaResult.setErr("",EmmaResultState.UNINTERPRETED);
        }
        if (emma_interpretation[0]["@emma:no-input"] == "true" )
        {
            // Return this value??
            emmaResult.setErr("",EmmaResultState.NO_INPUT);
        }

        // Print the Error Text
        if (emma_xml[0]['emma:info'] && emma_xml[0]['emma:info'][0]["ms:error"] && emma_xml[0]['emma:info'][0]["ms:error"][0]["$text"])
        {
            emmaResult.setErr(emma_xml[0]['emma:info'][0]["ms:error"][0]["$text"]);
        }

    }

    // Handle "emma:one-of"
    // NOTE: in tm:version="1.0", 'match' case fall under this block,
    // i.e. all interpretations are wrapped by <emma:one-of>
    var emma_one_of = emma_xml[0]['emma:one-of'] ;
    if (emma_one_of )
    {
        var emma_one_of_interpretation = emma_one_of[0]['emma:interpretation'] ;
        if (emma_one_of_interpretation)
        {
            vxmllog("Dictation Data:: confidence=" +emma_one_of_interpretation[0]["@emma:confidence"]
                    + " ;duration=" +emma_one_of_interpretation[0]["@emma:duration"]
                    + " ;text=" +emma_one_of_interpretation[0]["@emma:tokens"]) ;
            emmaResult.setText(emma_one_of_interpretation[0]["@emma:tokens"]);
            return emmaResult;
        }
            emmaResult.setErr("emma:interpretation missing in emma:one-of",EmmaResultState.ERROR);
            return emmaResult;
    }

    emmaResult.setErr("emma:interpretation missing",EmmaResultState.ERROR);
    return emmaResult;
}