It's our wits that make us men.

HTK解码代码分析1

Posted on By CQR

每个HMM的每个状态status都有一个TokenSet。这从结构体 _NetInst中可以看出。它有一个链表数据项TokenSet *state,就是保存每个状态status的TokenSet。每个TokenSet.like只保存对应状态的当前时刻观察值的最佳概率(所有状态到当前状态转移概率的最大值乘以观察值的输出概率),这个可以理解为令牌传递给当前时刻每个状态的概率。当这个概率小于pri->genThresh,对应状态的令牌将会被裁剪掉(被置零)。TokenSet.tok.path和TokenSet.tok.align则保存令牌传递过程的信息。path和align是一个双向链表,通过这两个链表可以追溯识别结果的序列,包括单词,HMM和状态级别。

阅读算法首先要理解数据结构:

/*节点实例*/
struct _NetInst
{
struct _NetInst *link; /* Doubly linked list of instances, forward 指向后继节点实例*/
struct _NetInst *knil; /* Doubly linked list of instances, backward 指向前继节点实例*/

NetNode *node;       /* Position of instance within network 实例所对应的节点*/

int flags;           /* Flags, active ... */
TokenSet *state;     /* TokenSet[0..N-2] in state [1..N-1] for hmm 一个HMM里面的每一个State对应一个TokenSet。保存每个状态的最佳概率和path&align信息*/
TokenSet *exit;      /* TokenSet in exit state 退出状态的令牌*/

LogFloat wdlk;       /* Max likelihood of t=0 path to word end node */
LogFloat max;        /* Likelihood for pruning of instance 当前时间帧的概率最大值*/

Boolean pxd;         /* External propagation done this frame */
Boolean ooo;         /* Instance potentially out of order */

 #ifdef SANITY
  int ipos;
 #endif
 };

/* A tokenset is effectively a state instance 状态的令牌结构体*/
typedef struct tokenset
{
short n;                  /* Number of rtok valid (0==1-best, 1>==N-best) n=0只有一条最佳路径。N>=1则有N条最佳路径*/
RelToken *set;            /* Likelihood sorted array[0..nToks] of rtoks 选择N-BEST后用到,用于保存N条最佳路径信息*/
Token tok;                /* Most likely Token in state 最佳路径信息*/
}
TokenSet;

/* Tokens are reasonably standard except for extra Align field 当前最佳路径的信息*/
typedef struct token
 {
LogDouble like;	/* Likelihood of token 当前时刻概率最大值*/
LogFloat lm;         /* LM likelihood of token 语言模型转移概率*/
Path *path;		/* Route (word level) through network 单词级别路径信息*/
Align *align;        /* Route (state/model level) through network 状态级别路径信息*/
}
Token;

/*记录单词级别的路径信息*/
struct path
{
Path *prev;		/* Previous word record 用于保存单词级别的识别结果*/
LogDouble like;      /* Likelihood at boundary 进入新单词时的概率*/
LogFloat lm;         /* LM likelihood of current word 单词之间的转移概率*/

NxtPath *chain;      /* Next of NBest Paths */
   
/* Pron pron;		Word level traceback info */
NetNode *node;       /* Word level traceback info 这条路径对应哪个节点*/
int frame;           /* Time (frame) of boundary (end of word) 当前是第几时间帧*/
Align *align;        /* State/model traceback for this word 对应的状态级别路径信息*/

Boolean used;        /* Reference to struct by current inst 本路径是否被使用过*/
int usage;           /* Times struct ref'd (by next path) 本路径被使用的次数*/

Path *link;          /* Next path in list 指向后继节点实例*/
Path *knil;          /* Prev path in list 指向前继节点实例*/
};

/* Extra alignments information for state/model level traceback 记录状态级别的路径信息*/
struct align
{
 short state;         /* State level traceback info 对应的状态*/
 NetNode *node;       /* Node for which alignment information present 对应的节点*/
 Align *prev;         /* Previous align record 用于保存状态级别的识别结果*/

 LogDouble like;      /* Likelihood upon entering state/model end 进入新状态时的概率*/
 int frame;           /* Frame number upon entering state/model end 当前是第几时间帧*/
   
 Boolean used;        /* Reference to struct by current inst 本路径是否被使用过*/
 int usage;           /* Times struct ref'd (by align or path) 本路径被使用的次数*/

 Align *link;         /* Next align in list 指向后继节点实例*/
 Align *knil;         /* Prev align in list 指向前继节点实例*/
 };