Using Hong Kong Chinese Speech Recognition

The Tellme Voice Application Network supports speech recognition for hong kong chinese language. This article demonstrates how to access this functionality.

This example demonstrates how to do recognition of 'hello' in hong kong chinese using inline grammar. Comments have been provided in the example vxml.

<?xml version="1.0" encoding="UTF-8" ?>
<vxml version="2.1"
  xmlns="http://www.w3.org/2001/vxml">

  <!--  
  Description:
  
  Simple recognition of Hello
  -->

    <form id="test">
        <field name="f1">
            <!-- Setting acoustic model to zh-hk.dnn -->
            <property name="tellme.acousticmodel" value="zh-hk.dnn"/>
            
            <!-- Ask user to say Ni Hao (hello) -->
            <prompt>Say <voice name="tracy"> 你好嗎 ? </voice></prompt>

            <!-- Inline grammar that can recognize Ni Hao (hello) -->
            <grammar mode="voice" version="1.0" xml:lang="zh-hk"  root="main" tag-format="semantics/1.0">

                <rule id="main" scope="public">
                    <one-of>
                        <item weight="1.0">
                            你好嗎 
                            <tag>out = "hello";</tag>
                        </item>
                    </one-of>
                </rule>
            </grammar>

            <filled>
                <log>f1.utterance: <value expr="f1$.utterance"/></log>
              
                <!-- Check result for recognition is a match with input (Hello) -->
                <if cond="f1$.utterance=='你好嗎'">
                    <audio><voice name="danny"> 謝謝你, 你也好嗎 ? </voice></audio>
                    <break time="300ms" />
                    <audio><voice name="danny"> 歡迎光臨 </voice></audio>
                </if>

            </filled>
        </field>

    </form>

</vxml>

Sentence recognition is one of the most commonly used type of recognition.

This example demonstrates how to do recognition of several different sentences in hong kong chinese using internal grammar. Comments have been provided in the example vxml.

<?xml version="1.0"?>
<vxml xmlns="http://www.w3.org/2001/vxml" xmlns:conf="http://www.w3.org/2002/vxml-conformance"
xmlns:qa="http://www.tellme.com/qa" version="2.1">
  <catch>
    <log>fail</log>
    <prompt>fail</prompt>
    <log>failure expression: 
    <value expr="'caught unexpected event: ' + _event" /></log>
    <prompt>
      <value expr="'caught unexpected event: ' + _event" />
    </prompt>
    <exit />
  </catch>
  <menu id="get_command" dtmf="true">
    <property name="tellme.acousticmodel" value="zh-hk.dnn" />
    <property name="recordutterance" value="true" />
    <prompt count="1">Say <voice name="tracy">我需要幾點退房 </voice></prompt>
    <choice next="#check_out" accept="approximate">wei 
    <grammar mode="voice" version="1.0" xml:lang="zh-HK" root="main" tag-format="semantics/1.0">
      <rule id="main" scope="public">
        <one-of>
          <item weight="1.0">我需要幾點退房
          <tag>out = "what_time_do_I_need_to_check_out_of_the_room";</tag></item>
        </one-of>
      </rule>
    </grammar></choice>
    <choice next="#payment" accept="approximate">wo xiang fu kuan 
    <grammar mode="voice" version="1.0" xml:lang="zh-HK" root="main" tag-format="semantics/1.0">
      <rule id="main" scope="public">
        <one-of>
          <item weight="1.0">我想付款
          <tag>out = "I_want_to_make_a_payment";</tag></item>
        </one-of>
      </rule>
    </grammar></choice>
    <choice next="#breakfast" accept="approximate">zai jian 
    <grammar mode="voice" version="1.0" xml:lang="zh-HK" root="main" tag-format="semantics/1.0">
      <rule id="main" scope="public">
        <one-of>
          <item weight="1.0">你們幾點提供早餐
          <tag>out = "when_do_you_serve_breakfast";</tag></item>
        </one-of>
      </rule>
    </grammar></choice>
    <choice next="#good_bye" accept="approximate">zai jian 
    <grammar mode="voice" version="1.0" xml:lang="zh-HK" root="main" tag-format="semantics/1.0">
      <rule id="main" scope="public">
        <one-of>
          <item weight="1.0">再見
          <tag>out = "good_bye";</tag></item>
        </one-of>
      </rule>
    </grammar></choice>
  </menu>
  <form id="check_out">
    <block>
      <log>application.lastresult$.toSource(): 
      <value expr="application.lastresult$.toSource()" /></log>
      <prompt> You said <break time="200ms"/>
         <voice name="danny"> 
          我需要幾點退房
         <break time="200ms"/>
         </voice>
      </prompt> 
      <log>pass</log>
      <prompt>pass</prompt>
    </block>
  </form>
  <form id="payment">
    <block>
      <log>application.lastresult$.toSource(): 
      <value expr="application.lastresult$.toSource()" /></log>
      <prompt> You said <break time="200ms"/>
         <voice name="danny"> 
          我想付款
         <break time="200ms"/>
         </voice>
      </prompt> 
    </block>
  </form>
  <form id="breakfast">
    <block>
      <log>application.lastresult$.toSource(): 
      <value expr="application.lastresult$.toSource()" /></log>
      <prompt> You said <break time="200ms"/>
         <voice name="danny">
          你們幾點提供早餐
         <break time="200ms"/>
         </voice>
      </prompt> 
      <exit />
    </block>
  </form>
  <form id="good_bye">
    <block>
      <log>application.lastresult$.toSource(): 
      <value expr="application.lastresult$.toSource()" /></log>
      <prompt> You said <break time="200ms"/>
         <voice name="danny"> 
          再見
         <break time="200ms"/>
         </voice>
      </prompt> 
      <exit />
    </block>
  </form>
</vxml>

DTMF recognition is still a commonly used way to enter credit card number of last 4 digits of social.

This example demonstrates how to do recognition of last four digits of social using internal DTMF grammar. Comments have been provided in the example vxml.

<?xml version="1.0" encoding="UTF-8" ?>
<vxml version="2.1"
  xmlns="http://www.w3.org/2001/vxml">

  <!-- Description: Get last 4 digits of social through DTMF -->

    <form id="test">
        <field name="f1">
            <!-- Setting acoustic model to zh-hk.dnn -->
            <property name="tellme.acousticmodel" value="zh-hk.dnn"/>

        <!-- Ask user to input last 4 digits of IDentification -->
        <prompt><voice name="danny"> 請按鍵  輸入身份證的最後4位數字 </voice></prompt>            
            
            <!-- Inline grammar that can recognize DTMF input -->
      <grammar version="1.0" type="application/srgs+xml" xml:lang="zh-hk" mode="dtmf" root="test">
      <rule id="test" scope="public">
         <!-- This means there should be 4 DTMF inputs -->
         <item repeat="4">
              <one-of>
                <item>0</item>
                <item>1</item>
                <item>2</item>
                <item>3</item>
                <item>4</item>
                <item>5</item>
                <item>6</item>
                <item>7</item>
                <item>8</item>
                <item>9</item>
              </one-of>
         </item>
     </rule>
     </grammar>

     <filled>
         <log>f1.utterance: <value expr="f1$.utterance"/></log>
         <audio><voice name="tracy">您説了 : <value expr="f1$.utterance"/></voice></audio>
         <break time="500ms" />
         <if cond="f1$.utterance=='1234' ">
         <log>pass</log>
         </if>
     </filled>
     </field>

  </form>

</vxml>

One of the most common use case is recognition of currency and number, most commonly used towards credit card payment.

This example demonstrates how to do recognition of a number and currency. Comments have been provided in the example vxml.

<?xml version="1.0" encoding="UTF-8" ?>
<vxml version="2.1"
  xmlns="http://www.w3.org/2001/vxml">

  <!--  
  Description:
  
  Simple recognition of Hello
  -->

    <form id="test">
        <field name="f1">
            <!-- Setting acoustic model to zh-hk.dnn -->
            <property name="tellme.acousticmodel" value="zh-hk.dnn"/>
            
            <prompt>Say <voice name="tracy"> <say-as type="">2000000000 </say-as></voice> </prompt>
            
            <!-- Inline grammar that can recognize two billions (2B) -->
            <grammar mode="voice" version="1.0" xml:lang="zh-hk"  root="main" tag-format="semantics/1.0">

                <rule id="main" scope="public">
                    <one-of>
                        <item weight="1.0">
                            貳拾
                            <tag>out = "20";</tag>
                        </item>
                        <item weight="1.0">
                            拾億
                            <tag>out = "1_billion";</tag>
                        </item>
                        <item weight="1.0">
                            貳拾億
                            <tag>out = "2_billions";</tag>
                        </item>
                    </one-of>
                </rule>
            </grammar>

            <filled>
                <log>f1.utterance: <value expr="f1$.utterance"/></log>
                <audio><voice name="danny">您説了 : <value expr="f1$.utterance"/></voice></audio>
                <break time="500ms" />
              
                <!-- Check result for recognition is a match with input (2 Billions) -->
                <if cond="f1$.utterance=='貳拾億'">
                    <break time="100ms" />
                    <audio><voice name="danny"> 謝謝, 貳拾億 </voice></audio>
                </if>

            </filled>
        </field>

    </form>

</vxml>

This example demonstrates how to do recognition of a boolean i.e. yes and no. Comments have been provided in the example vxml.

<?xml version="1.0" encoding="UTF-8" ?>
<vxml version="2.1"
  xmlns="http://www.w3.org/2001/vxml">

<!-- Description
    Recognize boolean yes or no
-->

  <form id="test">
    <field name="f1">
       <property name="tellme.acousticmodel" value="zh-hk.dnn"/>
       <property name="confidencelevel" value="0.20" />

       <!-- Ask user to say yes or no -->
       <prompt><voice name="danny">是或不是 ? </voice></prompt>
       
       <!-- Inline grammar to recognize yes and no -->
       <grammar mode="voice" version="1.0" xml:lang="zh-hk"  root="main" tag-format="semantics/1.0">
            <rule id="main" scope="public">
                <one-of>
                    <item>
                       是
                       <tag>out = "yes";</tag>
                     </item>
                    <item>
                        没有
                        <tag>out = "without";</tag>
                    </item>
                    <item>
                        冇 
                        <tag>out = "without";</tag>
                    </item>
                    <item>
                        有 
                        <tag>out = "with";</tag>
                    </item>
                    <item>
                        不是
                        <tag>out = "no";</tag>
                    </item>
                    <item>
                        否 
                        <tag>out = "negative";</tag>
                    </item>
                </one-of>
            </rule>
        </grammar>

        <filled>
            <audio><voice name="tracy">You said<value expr="f1$.utterance"/></voice></audio>
        </filled>
   </field>

  </form>

</vxml>

[24]7 Inc.| Terms of Service| Privacy Policy| General Disclaimers