| 
				 接着进行线性层运算,即将input数组右乘矩阵m_LinearTransMatrixBox(该矩阵的说明详见参考文献[1]),代码如下: 
input[2] = (byte)((input[2] + input[0]) & 0xFF); 
input[0] = (byte)((input[0] + input[2]) & 0xFF); 
    ...... 
   temp = input[30]; 
      input[30] = input[6]; 
         input[6] = temp; 
其实这段繁杂的代码就是一维数组input[]与一个二维数组m_LinearTransMatrixBox[][]的乘积,那么为什么不用以下更精炼的代码呢? 
Byte[] temp = new byte[input.length]; 
For (j=0;j<16;j++) 
  For (k=0;k<16;k++) 
    { 
      Temp[2*j] +=(byte)(input[2*k] * m_nLinearTransMatrixBox[k,j]); 
} 
For (j=0;j<16;j++) 
{ 
   Input[2*j] = (byte)temp[2*j]; 
} 
其中: 
Static public byte[,] m_nLinearTransMatrixBox = newbyte[16,16] 
{ 
  {2,2,1,1,16,8,2,1,4,2,4,2,1,1,4,4}, 
  {1,1,1,1,8,4,2,1,2,1,4,2,1,1,2,2}, 
  {1,1,4,4,2,1,4,2,4,2,16,8,2,2,1,1}, 
  {1,1,2,2,2,1,2,1,4,2,8,4,1,1,1,1}, 
{4,4,2,1,4,2,4,2,16,8,1,1,1,1,2,2}, 
{2,2,2,1,2,1,4,2,8,4,1,1,1,1,1,1}, 
{1,1,4,2,4,2,16,8,2,1,2,2,4,4,1,1}, 
{1,1,2,1,4,2,8,4,2,1,1,1,2,2,1,1}, 
{2,1,16,8,1,1,2,2,1,1,4,4,4,2,4,2}, 
{2,1,8,4,1,1,1,1,1,1,2,2,4,2,2,1}, 
{4,2,4,2,4,4,1,1,2,2,1,1,16,8,2,1}, 
{2,1,4,2,2,2,1,1,1,1,1,1,8,4,2,1}, 
{4,2,2,2,1,1,4,4,1,1,4,2,2,1,16,8}, 
{4,2,1,1,1,1,2,2,1,1,2,1,2,1,8,4}, 
{16,8,1,1,2,2,1,1,4,4,2,1,4,2,4,2}, 
{8,4,1,1,1,1,1,1,2,2,2,1,2,1,4,2} 
} 
很明显,因为这是模256的加法运算,所以只能采取在运算的每一步将部分和结果与0xFF做与(&)操作,从而保证其值介于0-255之间,因而不能采用通常意义上的二维数组乘法来进行处理。 
接着进行一个输出变换,代码为: 
input[0] = (byte)((input[0] ^ n_LocKeyExpandBox[16 * m_nChipherLen]));/* 此处不能做xFF */ 
                input[2] = (byte)((input[2] + n_LocKeyExpandBox[16 * m_nChipherLen + 1]) & 0xFF); 
                input[4] = (byte)((input[4] + n_LocKeyExpandBox[16 * m_nChipherLen + 2]) & 0xFF); 
                input[6] = (byte)((input[6] ^ n_LocKeyExpandBox[16 * m_nChipherLen + 3]) & 0xFF); 
                input[8] = (byte)((input[8] ^ n_LocKeyExpandBox[16 * m_nChipherLen + 4]) & 0xFF); 
                input[10] = (byte)((input[10] + n_LocKeyExpandBox[16 * m_nChipherLen + 5]) & 0xFF); 
                input[12] = (byte)((input[12] + n_LocKeyExpandBox[16 * m_nChipherLen + 6]) & 0xFF); 
                input[14] = (byte)((input[14] ^ n_LocKeyExpandBox[16 * m_nChipherLen + 7]) & 0xFF); 
                input[16] = (byte)((input[16] ^ n_LocKeyExpandBox[16 * m_nChipherLen + 8]) & 0xFF); 
                input[18] = (byte)((input[18] + n_LocKeyExpandBox[16 * m_nChipherLen + 9]) & 0xFF); 
                input[20] = (byte)((input[20] + n_LocKeyExpandBox[16 * m_nChipherLen + 10]) & 0xFF); 
                input[22] = (byte)((input[22] ^ n_LocKeyExpandBox[16 * m_nChipherLen + 11]) & 0xFF); 
                input[24] = (byte)((input[24] ^ n_LocKeyExpandBox[16 * m_nChipherLen + 12]) & 0xFF); 
                input[26] = (byte)((input[26] + n_LocKeyExpandBox[16 * m_nChipherLen + 13]) & 0xFF); 
                input[28] = (byte)((input[28] + n_LocKeyExpandBox[16 * m_nChipherLen + 14]) & 0xFF); 
                input[30] = (byte)((input[30] ^ n_LocKeyExpandBox[16 * m_nChipherLen + 15]) & 0xFF); 			
				 |