/*
 * Automatically Tuned Linear Algebra Software v3.11.41
 * Copyright (C) 2013 R. Clint Whaley
 */
#include "atlas_asm.h"
#define MOVAPD movaps
#define nmu     %rdi
#define nnu     %rsi
#define nnu0    %r10
#define pA      %rcx
#define pB      %rax
#define pC      %r9
#define pfA     %rbp
#define pB0     %r12
#define incPF   %rbx
#define pfB     %rdx
#define incAm   %r11
#define r256    %r8

#define rA0     %xmm0
#define rB0     %xmm1
#define rC00    %xmm2
#define rC01    %xmm3
#define rC02    %xmm4
#define rC03    %xmm5
#define rC04    %xmm6
#define rC05    %xmm7
#define rC06    %xmm8
#define rC07    %xmm9
#define rC08    %xmm10
#define rC09    %xmm11
#define rC10    %xmm12
#define rC11    %xmm13
/*
 * Small problems on AMD can be prefetched to L1
 */
#if KB < 45
   #ifdef ATL_3DNow
      #ifdef ATL_MOVEB
         #define prefA(m_)
         #define prefB(m_) prefetcht0 m_
      #elif defined(ATL_MOVEA)
         #define prefA(m_) prefetcht0 m_
         #define prefB(m_)
      #endif
   #else
      #define prefA(m_) prefetcht1 m_
      #define prefB(m_) prefetcht1 m_
   #endif
#else
   #if KB > 78  /* very large problems should only pref 1 block */
      #ifdef ATL_MOVEB
         #define prefB(m_) prefetcht2 m_
         #define prefA(m_)
      #else
         #define prefA(m_) prefetcht2 m_
         #define prefB(m_)
      #endif
   #else
      #define prefA(m_) prefetcht2 m_
      #define prefB(m_) prefetcht2 m_
   #endif
#endif
#ifdef ATL_3DNow
   #define prefC(m_) prefetchw m_
#else
   #define prefC(m_) prefetcht0 m_
#endif
#if defined(BETAN) || defined(BETAn)
   #define BETAN1
#endif
#ifdef BETAN1
   #define BETCOP subpd
#else
   #define BETCOP addpd
#endif
#define FSIZE 6*8
/*
                    rdi      rsi    rdx        rcx         r8        r9
void ATL_USERMM(SZT nmu, SZT nnu, SZT K, CTYPE *pA, CTYPE *pB, TYPE *pC,
                  8(%rsp)    16(%rsp)     24(%rsp)
                CTYPE *pAn, CTYPE *pBn, CTYPE *pCn);
 */
.text
.global ATL_asmdecor(ATL_USERMM)
ALIGN16
ATL_asmdecor(ATL_USERMM):
/*
 * Save callee-saved iregs
 */
     prefetcht0 (pA)
   sub $FSIZE, %rsp
     prefetcht0 (%r8)
   movq    %rbp, 0(%rsp)
     prefetcht0 64(pA)
   movq    %rbx, 8(%rsp)
     prefetcht0 128(pA)
   movq    %r12, 16(%rsp)
     prefetcht0 192(pA)
/*
 * Load paramaters
 */
   movq %r8, pB
     prefetcht0 256(pA)
   mov nnu, nnu0
     prefetcht0 320(pA)
   movq FSIZE+16(%rsp), pfB     /* pf = pBn */
     prefetcht0 384(pA)
   movq FSIZE+8(%rsp), pfA      /* pfB = pAn */
     prefetcht0 448(pA)
   mov $24*1*8, incPF   /* incPF = MU*NU*sizeof */
/*
 * Maximize are small-op size by adding 128 to ptrs
 */
   sub $-128, pA
     prefetcht0 512(pA)
   sub $-128, pB
     prefetcht0 576(pA)
   sub $-128, pC
     prefetcht0 640(pA)
   sub $-128, pfA
   sub $-128, pfB
   mov $KB*24*8, incAm           /* incAm = KB*MU*size */
   movq pB, pB0
   mov $256, r256

   ALIGN16
   .local MNLOOP
   MNLOOP:
/*
 *       Peel first iteration of K-loop to handle init of C to 0
 */
#if 0
         xorps rC00, rC00
         MOVAPD rC00, rC01
         xorps rC02, rC02
         MOVAPD rC00, rC03
         xorps rC04, rC04
         MOVAPD rC00, rC05
         xorps rC06, rC06
         MOVAPD rC00, rC07
         xorps rC08, rC08
         MOVAPD rC00, rC09
         xorps rC10, rC10
         MOVAPD rC00, rC11
#endif
/*
 *       Unroll 1st iteration for zeroing or rCxx & prefetch
 */
         movddup -128(pB), rC00
         MOVAPD rC00, rC01
         mulpd -128(pA), rC00
         prefC(-128(pC))
         MOVAPD rC01, rC02
         mulpd -112(pA), rC01
         prefC(-64(pC))
         MOVAPD rC02, rC03
         mulpd -96(pA), rC02
         prefC((pC))
         MOVAPD rC03, rC04
         mulpd -80(pA), rC03
         #ifdef ATL_MOVEB
            prefB(-128(pfB))
         #else
            prefC(64(pC))
         #endif
         MOVAPD rC04, rC05
         mulpd -64(pA), rC04
         #ifdef ATL_MOVEB
            prefB(-64(pfB))
         #else
            prefC(-128(pC,r256))
         #endif
         MOVAPD rC05, rC06
         mulpd -48(pA), rC05
         #ifdef ATL_MOVEB
            prefB((pfB))
         #else
            prefC(-64(pC,r256))
         #endif
         MOVAPD rC06, rC07
         mulpd -32(pA), rC06
         prefA(-128(pfA))
         MOVAPD rC07, rC08
         mulpd -16(pA), rC07
         prefA(-64(pfA))
         MOVAPD rC08, rC09
         mulpd (pA), rC08
         prefA((pfA))
         MOVAPD rC09, rC10
         mulpd 16(pA), rC09
         MOVAPD rC10, rC11
         mulpd 32(pA), rC10
         mulpd 48(pA), rC11
/*
 *       ==========================
 *       Completely unrolled K-loop
 *       ==========================
 */
         #if KB > 1
            movddup -120(pB), rB0
            MOVAPD 64(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 80(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 96(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 240(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 2
            movddup -112(pB), rB0
            MOVAPD 256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 432(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 3
            movddup -104(pB), rB0
            MOVAPD 448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 624(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 4
            movddup -96(pB), rB0
            MOVAPD 640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 816(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 5
            movddup -88(pB), rB0
            MOVAPD 832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 1008(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 6
            movddup -80(pB), rB0
            MOVAPD 1024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 1040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 1056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 1072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 1088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 1104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 1120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 1136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 1152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 1168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 1184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 1200(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 7
            movddup -72(pB), rB0
            MOVAPD 1216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 1232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 1248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 1264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 1280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 1296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 1312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 1328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 1344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 1360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 1376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 1392(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 8
            movddup -64(pB), rB0
            MOVAPD 1408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 1424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 1440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 1456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 1472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 1488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 1504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 1520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 1536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 1552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 1568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 1584(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 9
            movddup -56(pB), rB0
            MOVAPD 1600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 1616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 1632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 1648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 1664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 1680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 1696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 1712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 1728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 1744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 1760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 1776(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 10
            movddup -48(pB), rB0
            MOVAPD 1792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 1808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 1824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 1840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 1856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 1872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 1888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 1904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 1920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 1936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 1952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 1968(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 11
            movddup -40(pB), rB0
            MOVAPD 1984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 2000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 2016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 2032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 2048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 2064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 2080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 2096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 2112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 2128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 2144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 2160(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 12
            movddup -32(pB), rB0
            MOVAPD 2176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 2192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 2208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 2224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 2240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 2256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 2272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 2288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 2304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 2320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 2336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 2352(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 13
            movddup -24(pB), rB0
            MOVAPD 2368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 2384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 2400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 2416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 2432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 2448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 2464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 2480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 2496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 2512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 2528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 2544(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 14
            movddup -16(pB), rB0
            MOVAPD 2560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 2576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 2592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 2608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 2624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 2640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 2656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 2672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 2688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 2704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 2720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 2736(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 15
            movddup -8(pB), rB0
            MOVAPD 2752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 2768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 2784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 2800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 2816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 2832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 2848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 2864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 2880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 2896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 2912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 2928(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 16
            movddup 0(pB), rB0
            MOVAPD 2944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 2960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 2976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 2992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 3008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 3024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 3040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 3056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 3072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 3088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 3104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 3120(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 17
            movddup 8(pB), rB0
            MOVAPD 3136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 3152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 3168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 3184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 3200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 3216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 3232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 3248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 3264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 3280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 3296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 3312(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 18
            movddup 16(pB), rB0
            MOVAPD 3328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 3344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 3360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 3376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 3392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 3408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 3424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 3440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 3456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 3472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 3488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 3504(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 19
            movddup 24(pB), rB0
            MOVAPD 3520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 3536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 3552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 3568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 3584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 3600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 3616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 3632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 3648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 3664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 3680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 3696(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 20
            movddup 32(pB), rB0
            MOVAPD 3712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 3728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 3744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 3760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 3776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 3792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 3808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 3824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 3840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 3856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 3872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 3888(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 21
            movddup 40(pB), rB0
            MOVAPD 3904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 3920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 3936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 3952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 3968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 3984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 4000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 4016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 4032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 4048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 4064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 4080(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 22
            movddup 48(pB), rB0
            MOVAPD 4096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 4112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 4128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 4144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 4160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 4176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 4192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 4208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 4224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 4240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 4256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 4272(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 23
            movddup 56(pB), rB0
            MOVAPD 4288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 4304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 4320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 4336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 4352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 4368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 4384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 4400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 4416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 4432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 4448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 4464(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 24
            movddup 64(pB), rB0
            MOVAPD 4480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 4496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 4512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 4528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 4544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 4560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 4576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 4592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 4608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 4624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 4640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 4656(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 25
            movddup 72(pB), rB0
            MOVAPD 4672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 4688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 4704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 4720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 4736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 4752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 4768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 4784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 4800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 4816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 4832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 4848(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 26
            movddup 80(pB), rB0
            MOVAPD 4864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 4880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 4896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 4912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 4928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 4944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 4960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 4976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 4992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 5008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 5024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 5040(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 27
            movddup 88(pB), rB0
            MOVAPD 5056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 5072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 5088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 5104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 5120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 5136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 5152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 5168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 5184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 5200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 5216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 5232(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 28
            movddup 96(pB), rB0
            MOVAPD 5248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 5264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 5280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 5296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 5312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 5328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 5344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 5360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 5376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 5392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 5408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 5424(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 29
            movddup 104(pB), rB0
            MOVAPD 5440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 5456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 5472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 5488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 5504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 5520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 5536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 5552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 5568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 5584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 5600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 5616(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 30
            movddup 112(pB), rB0
            MOVAPD 5632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 5648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 5664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 5680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 5696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 5712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 5728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 5744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 5760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 5776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 5792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 5808(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 31
            movddup 120(pB), rB0
            MOVAPD 5824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 5840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 5856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 5872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 5888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 5904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 5920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 5936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 5952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 5968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 5984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 6000(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 32
            movddup 128(pB), rB0
            MOVAPD 6016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 6032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 6048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 6064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 6080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 6096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 6112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 6128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 6144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 6160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 6176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 6192(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 33
            movddup 136(pB), rB0
            MOVAPD 6208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 6224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 6240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 6256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 6272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 6288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 6304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 6320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 6336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 6352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 6368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 6384(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 34
            movddup 144(pB), rB0
            MOVAPD 6400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 6416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 6432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 6448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 6464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 6480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 6496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 6512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 6528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 6544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 6560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 6576(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 35
            movddup 152(pB), rB0
            MOVAPD 6592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 6608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 6624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 6640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 6656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 6672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 6688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 6704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 6720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 6736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 6752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 6768(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 36
            movddup 160(pB), rB0
            MOVAPD 6784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 6800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 6816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 6832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 6848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 6864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 6880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 6896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 6912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 6928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 6944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 6960(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 37
            movddup 168(pB), rB0
            MOVAPD 6976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 6992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 7008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 7024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 7040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 7056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 7072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 7088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 7104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 7120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 7136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 7152(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 38
            movddup 176(pB), rB0
            MOVAPD 7168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 7184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 7200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 7216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 7232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 7248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 7264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 7280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 7296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 7312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 7328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 7344(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 39
            movddup 184(pB), rB0
            MOVAPD 7360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 7376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 7392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 7408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 7424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 7440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 7456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 7472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 7488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 7504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 7520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 7536(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 40
            movddup 192(pB), rB0
            MOVAPD 7552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 7568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 7584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 7600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 7616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 7632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 7648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 7664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 7680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 7696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 7712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 7728(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 41
            movddup 200(pB), rB0
            MOVAPD 7744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 7760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 7776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 7792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 7808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 7824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 7840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 7856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 7872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 7888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 7904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 7920(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 42
            movddup 208(pB), rB0
            MOVAPD 7936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 7952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 7968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 7984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 8000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 8016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 8032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 8048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 8064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 8080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 8096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 8112(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 43
            movddup 216(pB), rB0
            MOVAPD 8128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 8144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 8160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 8176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 8192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 8208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 8224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 8240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 8256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 8272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 8288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 8304(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 44
            movddup 224(pB), rB0
            MOVAPD 8320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 8336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 8352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 8368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 8384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 8400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 8416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 8432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 8448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 8464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 8480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 8496(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 45
            movddup 232(pB), rB0
            MOVAPD 8512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 8528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 8544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 8560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 8576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 8592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 8608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 8624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 8640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 8656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 8672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 8688(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 46
            movddup 240(pB), rB0
            MOVAPD 8704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 8720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 8736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 8752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 8768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 8784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 8800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 8816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 8832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 8848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 8864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 8880(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 47
            movddup 248(pB), rB0
            MOVAPD 8896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 8912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 8928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 8944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 8960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 8976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 8992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 9008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 9024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 9040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 9056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 9072(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 48
            movddup 256(pB), rB0
            MOVAPD 9088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 9104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 9120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 9136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 9152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 9168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 9184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 9200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 9216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 9232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 9248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 9264(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 49
            movddup 264(pB), rB0
            MOVAPD 9280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 9296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 9312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 9328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 9344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 9360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 9376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 9392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 9408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 9424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 9440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 9456(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 50
            movddup 272(pB), rB0
            MOVAPD 9472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 9488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 9504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 9520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 9536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 9552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 9568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 9584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 9600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 9616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 9632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 9648(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 51
            movddup 280(pB), rB0
            MOVAPD 9664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 9680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 9696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 9712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 9728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 9744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 9760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 9776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 9792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 9808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 9824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 9840(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 52
            movddup 288(pB), rB0
            MOVAPD 9856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 9872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 9888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 9904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 9920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 9936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 9952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 9968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 9984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 10000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 10016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 10032(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 53
            movddup 296(pB), rB0
            MOVAPD 10048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 10064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 10080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 10096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 10112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 10128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 10144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 10160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 10176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 10192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 10208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 10224(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 54
            movddup 304(pB), rB0
            MOVAPD 10240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 10256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 10272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 10288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 10304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 10320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 10336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 10352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 10368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 10384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 10400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 10416(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 55
            movddup 312(pB), rB0
            MOVAPD 10432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 10448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 10464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 10480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 10496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 10512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 10528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 10544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 10560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 10576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 10592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 10608(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 56
            movddup 320(pB), rB0
            MOVAPD 10624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 10640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 10656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 10672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 10688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 10704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 10720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 10736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 10752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 10768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 10784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 10800(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 57
            movddup 328(pB), rB0
            MOVAPD 10816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 10832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 10848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 10864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 10880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 10896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 10912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 10928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 10944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 10960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 10976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 10992(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 58
            movddup 336(pB), rB0
            MOVAPD 11008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 11024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 11040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 11056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 11072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 11088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 11104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 11120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 11136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 11152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 11168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 11184(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 59
            movddup 344(pB), rB0
            MOVAPD 11200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 11216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 11232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 11248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 11264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 11280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 11296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 11312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 11328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 11344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 11360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 11376(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 60
            movddup 352(pB), rB0
            MOVAPD 11392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 11408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 11424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 11440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 11456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 11472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 11488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 11504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 11520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 11536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 11552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 11568(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 61
            movddup 360(pB), rB0
            MOVAPD 11584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 11600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 11616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 11632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 11648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 11664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 11680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 11696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 11712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 11728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 11744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 11760(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 62
            movddup 368(pB), rB0
            MOVAPD 11776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 11792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 11808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 11824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 11840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 11856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 11872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 11888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 11904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 11920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 11936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 11952(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 63
            movddup 376(pB), rB0
            MOVAPD 11968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 11984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 12000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 12016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 12032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 12048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 12064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 12080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 12096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 12112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 12128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 12144(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 64
            movddup 384(pB), rB0
            MOVAPD 12160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 12176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 12192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 12208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 12224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 12240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 12256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 12272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 12288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 12304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 12320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 12336(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 65
            movddup 392(pB), rB0
            MOVAPD 12352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 12368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 12384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 12400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 12416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 12432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 12448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 12464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 12480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 12496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 12512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 12528(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 66
            movddup 400(pB), rB0
            MOVAPD 12544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 12560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 12576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 12592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 12608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 12624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 12640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 12656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 12672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 12688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 12704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 12720(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 67
            movddup 408(pB), rB0
            MOVAPD 12736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 12752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 12768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 12784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 12800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 12816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 12832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 12848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 12864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 12880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 12896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 12912(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 68
            movddup 416(pB), rB0
            MOVAPD 12928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 12944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 12960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 12976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 12992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 13008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 13024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 13040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 13056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 13072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 13088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 13104(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 69
            movddup 424(pB), rB0
            MOVAPD 13120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 13136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 13152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 13168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 13184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 13200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 13216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 13232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 13248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 13264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 13280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 13296(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 70
            movddup 432(pB), rB0
            MOVAPD 13312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 13328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 13344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 13360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 13376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 13392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 13408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 13424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 13440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 13456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 13472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 13488(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 71
            movddup 440(pB), rB0
            MOVAPD 13504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 13520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 13536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 13552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 13568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 13584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 13600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 13616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 13632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 13648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 13664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 13680(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 72
            movddup 448(pB), rB0
            MOVAPD 13696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 13712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 13728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 13744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 13760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 13776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 13792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 13808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 13824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 13840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 13856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 13872(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 73
            movddup 456(pB), rB0
            MOVAPD 13888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 13904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 13920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 13936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 13952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 13968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 13984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 14000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 14016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 14032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 14048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 14064(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 74
            movddup 464(pB), rB0
            MOVAPD 14080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 14096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 14112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 14128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 14144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 14160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 14176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 14192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 14208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 14224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 14240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 14256(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 75
            movddup 472(pB), rB0
            MOVAPD 14272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 14288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 14304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 14320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 14336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 14352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 14368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 14384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 14400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 14416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 14432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 14448(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 76
            movddup 480(pB), rB0
            MOVAPD 14464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 14480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 14496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 14512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 14528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 14544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 14560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 14576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 14592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 14608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 14624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 14640(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 77
            movddup 488(pB), rB0
            MOVAPD 14656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 14672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 14688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 14704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 14720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 14736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 14752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 14768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 14784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 14800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 14816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 14832(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 78
            movddup 496(pB), rB0
            MOVAPD 14848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 14864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 14880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 14896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 14912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 14928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 14944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 14960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 14976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 14992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 15008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 15024(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 79
            movddup 504(pB), rB0
            MOVAPD 15040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 15056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 15072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 15088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 15104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 15120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 15136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 15152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 15168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 15184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 15200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 15216(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 80
            movddup 512(pB), rB0
            MOVAPD 15232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 15248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 15264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 15280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 15296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 15312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 15328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 15344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 15360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 15376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 15392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 15408(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 81
            movddup 520(pB), rB0
            MOVAPD 15424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 15440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 15456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 15472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 15488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 15504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 15520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 15536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 15552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 15568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 15584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 15600(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 82
            movddup 528(pB), rB0
            MOVAPD 15616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 15632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 15648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 15664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 15680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 15696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 15712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 15728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 15744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 15760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 15776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 15792(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 83
            movddup 536(pB), rB0
            MOVAPD 15808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 15824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 15840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 15856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 15872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 15888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 15904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 15920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 15936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 15952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 15968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 15984(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 84
            movddup 544(pB), rB0
            MOVAPD 16000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 16016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 16032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 16048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 16064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 16080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 16096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 16112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 16128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 16144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 16160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 16176(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 85
            movddup 552(pB), rB0
            MOVAPD 16192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 16208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 16224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 16240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 16256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 16272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 16288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 16304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 16320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 16336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 16352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 16368(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 86
            movddup 560(pB), rB0
            MOVAPD 16384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 16400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 16416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 16432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 16448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 16464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 16480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 16496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 16512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 16528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 16544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 16560(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 87
            movddup 568(pB), rB0
            MOVAPD 16576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 16592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 16608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 16624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 16640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 16656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 16672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 16688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 16704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 16720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 16736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 16752(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 88
            movddup 576(pB), rB0
            MOVAPD 16768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 16784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 16800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 16816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 16832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 16848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 16864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 16880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 16896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 16912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 16928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 16944(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 89
            movddup 584(pB), rB0
            MOVAPD 16960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 16976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 16992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 17008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 17024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 17040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 17056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 17072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 17088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 17104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 17120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 17136(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 90
            movddup 592(pB), rB0
            MOVAPD 17152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 17168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 17184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 17200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 17216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 17232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 17248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 17264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 17280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 17296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 17312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 17328(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 91
            movddup 600(pB), rB0
            MOVAPD 17344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 17360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 17376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 17392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 17408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 17424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 17440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 17456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 17472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 17488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 17504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 17520(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 92
            movddup 608(pB), rB0
            MOVAPD 17536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 17552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 17568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 17584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 17600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 17616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 17632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 17648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 17664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 17680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 17696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 17712(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 93
            movddup 616(pB), rB0
            MOVAPD 17728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 17744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 17760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 17776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 17792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 17808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 17824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 17840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 17856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 17872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 17888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 17904(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 94
            movddup 624(pB), rB0
            MOVAPD 17920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 17936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 17952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 17968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 17984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 18000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 18016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 18032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 18048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 18064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 18080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 18096(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 95
            movddup 632(pB), rB0
            MOVAPD 18112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 18128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 18144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 18160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 18176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 18192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 18208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 18224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 18240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 18256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 18272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 18288(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 96
            movddup 640(pB), rB0
            MOVAPD 18304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 18320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 18336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 18352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 18368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 18384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 18400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 18416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 18432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 18448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 18464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 18480(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 97
            movddup 648(pB), rB0
            MOVAPD 18496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 18512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 18528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 18544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 18560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 18576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 18592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 18608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 18624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 18640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 18656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 18672(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 98
            movddup 656(pB), rB0
            MOVAPD 18688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 18704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 18720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 18736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 18752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 18768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 18784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 18800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 18816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 18832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 18848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 18864(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 99
            movddup 664(pB), rB0
            MOVAPD 18880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 18896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 18912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 18928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 18944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 18960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 18976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 18992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 19008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 19024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 19040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 19056(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 100
            movddup 672(pB), rB0
            MOVAPD 19072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 19088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 19104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 19120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 19136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 19152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 19168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 19184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 19200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 19216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 19232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 19248(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 101
            movddup 680(pB), rB0
            MOVAPD 19264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 19280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 19296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 19312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 19328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 19344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 19360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 19376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 19392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 19408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 19424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 19440(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 102
            movddup 688(pB), rB0
            MOVAPD 19456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 19472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 19488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 19504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 19520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 19536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 19552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 19568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 19584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 19600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 19616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 19632(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 103
            movddup 696(pB), rB0
            MOVAPD 19648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 19664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 19680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 19696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 19712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 19728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 19744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 19760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 19776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 19792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 19808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 19824(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 104
            movddup 704(pB), rB0
            MOVAPD 19840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 19856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 19872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 19888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 19904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 19920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 19936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 19952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 19968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 19984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 20000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 20016(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 105
            movddup 712(pB), rB0
            MOVAPD 20032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 20048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 20064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 20080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 20096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 20112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 20128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 20144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 20160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 20176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 20192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 20208(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 106
            movddup 720(pB), rB0
            MOVAPD 20224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 20240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 20256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 20272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 20288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 20304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 20320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 20336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 20352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 20368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 20384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 20400(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 107
            movddup 728(pB), rB0
            MOVAPD 20416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 20432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 20448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 20464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 20480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 20496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 20512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 20528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 20544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 20560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 20576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 20592(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 108
            movddup 736(pB), rB0
            MOVAPD 20608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 20624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 20640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 20656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 20672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 20688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 20704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 20720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 20736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 20752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 20768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 20784(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 109
            movddup 744(pB), rB0
            MOVAPD 20800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 20816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 20832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 20848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 20864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 20880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 20896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 20912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 20928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 20944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 20960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 20976(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 110
            movddup 752(pB), rB0
            MOVAPD 20992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 21008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 21024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 21040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 21056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 21072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 21088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 21104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 21120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 21136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 21152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 21168(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 111
            movddup 760(pB), rB0
            MOVAPD 21184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 21200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 21216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 21232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 21248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 21264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 21280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 21296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 21312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 21328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 21344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 21360(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 112
            movddup 768(pB), rB0
            MOVAPD 21376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 21392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 21408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 21424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 21440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 21456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 21472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 21488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 21504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 21520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 21536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 21552(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 113
            movddup 776(pB), rB0
            MOVAPD 21568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 21584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 21600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 21616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 21632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 21648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 21664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 21680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 21696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 21712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 21728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 21744(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 114
            movddup 784(pB), rB0
            MOVAPD 21760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 21776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 21792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 21808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 21824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 21840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 21856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 21872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 21888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 21904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 21920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 21936(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 115
            movddup 792(pB), rB0
            MOVAPD 21952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 21968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 21984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 22000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 22016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 22032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 22048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 22064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 22080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 22096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 22112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 22128(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 116
            movddup 800(pB), rB0
            MOVAPD 22144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 22160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 22176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 22192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 22208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 22224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 22240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 22256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 22272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 22288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 22304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 22320(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 117
            movddup 808(pB), rB0
            MOVAPD 22336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 22352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 22368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 22384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 22400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 22416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 22432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 22448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 22464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 22480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 22496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 22512(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 118
            movddup 816(pB), rB0
            MOVAPD 22528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 22544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 22560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 22576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 22592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 22608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 22624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 22640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 22656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 22672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 22688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 22704(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 119
            movddup 824(pB), rB0
            MOVAPD 22720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 22736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 22752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 22768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 22784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 22800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 22816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 22832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 22848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 22864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 22880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 22896(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 120
            movddup 832(pB), rB0
            MOVAPD 22912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 22928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 22944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 22960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 22976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 22992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 23008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 23024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 23040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 23056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 23072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 23088(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 121
            movddup 840(pB), rB0
            MOVAPD 23104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 23120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 23136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 23152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 23168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 23184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 23200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 23216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 23232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 23248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 23264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 23280(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 122
            movddup 848(pB), rB0
            MOVAPD 23296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 23312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 23328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 23344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 23360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 23376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 23392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 23408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 23424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 23440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 23456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 23472(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 123
            movddup 856(pB), rB0
            MOVAPD 23488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 23504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 23520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 23536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 23552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 23568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 23584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 23600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 23616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 23632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 23648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 23664(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 124
            movddup 864(pB), rB0
            MOVAPD 23680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 23696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 23712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 23728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 23744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 23760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 23776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 23792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 23808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 23824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 23840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 23856(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 125
            movddup 872(pB), rB0
            MOVAPD 23872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 23888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 23904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 23920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 23936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 23952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 23968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 23984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 24000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 24016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 24032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 24048(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 126
            movddup 880(pB), rB0
            MOVAPD 24064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 24080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 24096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 24112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 24128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 24144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 24160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 24176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 24192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 24208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 24224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 24240(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 127
            movddup 888(pB), rB0
            MOVAPD 24256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 24272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 24288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 24304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 24320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 24336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 24352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 24368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 24384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 24400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 24416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 24432(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 128
            movddup 896(pB), rB0
            MOVAPD 24448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 24464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 24480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 24496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 24512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 24528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 24544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 24560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 24576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 24592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 24608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 24624(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 129
            movddup 904(pB), rB0
            MOVAPD 24640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 24656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 24672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 24688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 24704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 24720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 24736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 24752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 24768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 24784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 24800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 24816(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 130
            movddup 912(pB), rB0
            MOVAPD 24832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 24848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 24864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 24880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 24896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 24912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 24928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 24944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 24960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 24976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 24992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 25008(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 131
            movddup 920(pB), rB0
            MOVAPD 25024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 25040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 25056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 25072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 25088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 25104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 25120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 25136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 25152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 25168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 25184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 25200(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 132
            movddup 928(pB), rB0
            MOVAPD 25216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 25232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 25248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 25264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 25280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 25296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 25312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 25328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 25344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 25360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 25376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 25392(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 133
            movddup 936(pB), rB0
            MOVAPD 25408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 25424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 25440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 25456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 25472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 25488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 25504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 25520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 25536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 25552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 25568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 25584(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 134
            movddup 944(pB), rB0
            MOVAPD 25600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 25616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 25632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 25648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 25664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 25680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 25696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 25712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 25728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 25744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 25760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 25776(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 135
            movddup 952(pB), rB0
            MOVAPD 25792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 25808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 25824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 25840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 25856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 25872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 25888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 25904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 25920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 25936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 25952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 25968(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 136
            movddup 960(pB), rB0
            MOVAPD 25984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 26000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 26016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 26032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 26048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 26064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 26080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 26096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 26112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 26128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 26144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 26160(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 137
            movddup 968(pB), rB0
            MOVAPD 26176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 26192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 26208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 26224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 26240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 26256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 26272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 26288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 26304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 26320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 26336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 26352(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 138
            movddup 976(pB), rB0
            MOVAPD 26368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 26384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 26400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 26416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 26432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 26448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 26464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 26480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 26496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 26512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 26528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 26544(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 139
            movddup 984(pB), rB0
            MOVAPD 26560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 26576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 26592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 26608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 26624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 26640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 26656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 26672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 26688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 26704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 26720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 26736(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 140
            movddup 992(pB), rB0
            MOVAPD 26752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 26768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 26784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 26800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 26816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 26832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 26848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 26864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 26880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 26896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 26912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 26928(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 141
            movddup 1000(pB), rB0
            MOVAPD 26944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 26960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 26976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 26992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 27008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 27024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 27040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 27056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 27072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 27088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 27104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 27120(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 142
            movddup 1008(pB), rB0
            MOVAPD 27136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 27152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 27168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 27184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 27200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 27216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 27232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 27248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 27264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 27280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 27296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 27312(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 143
            movddup 1016(pB), rB0
            MOVAPD 27328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 27344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 27360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 27376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 27392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 27408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 27424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 27440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 27456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 27472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 27488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 27504(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 144
            movddup 1024(pB), rB0
            MOVAPD 27520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 27536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 27552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 27568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 27584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 27600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 27616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 27632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 27648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 27664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 27680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 27696(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 145
            movddup 1032(pB), rB0
            MOVAPD 27712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 27728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 27744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 27760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 27776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 27792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 27808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 27824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 27840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 27856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 27872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 27888(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 146
            movddup 1040(pB), rB0
            MOVAPD 27904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 27920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 27936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 27952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 27968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 27984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 28000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 28016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 28032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 28048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 28064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 28080(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 147
            movddup 1048(pB), rB0
            MOVAPD 28096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 28112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 28128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 28144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 28160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 28176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 28192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 28208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 28224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 28240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 28256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 28272(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 148
            movddup 1056(pB), rB0
            MOVAPD 28288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 28304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 28320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 28336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 28352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 28368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 28384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 28400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 28416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 28432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 28448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 28464(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 149
            movddup 1064(pB), rB0
            MOVAPD 28480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 28496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 28512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 28528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 28544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 28560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 28576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 28592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 28608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 28624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 28640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 28656(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 150
            movddup 1072(pB), rB0
            MOVAPD 28672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 28688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 28704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 28720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 28736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 28752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 28768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 28784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 28800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 28816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 28832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 28848(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 151
            movddup 1080(pB), rB0
            MOVAPD 28864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 28880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 28896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 28912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 28928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 28944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 28960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 28976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 28992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 29008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 29024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 29040(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 152
            movddup 1088(pB), rB0
            MOVAPD 29056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 29072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 29088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 29104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 29120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 29136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 29152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 29168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 29184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 29200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 29216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 29232(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 153
            movddup 1096(pB), rB0
            MOVAPD 29248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 29264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 29280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 29296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 29312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 29328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 29344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 29360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 29376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 29392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 29408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 29424(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 154
            movddup 1104(pB), rB0
            MOVAPD 29440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 29456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 29472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 29488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 29504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 29520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 29536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 29552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 29568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 29584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 29600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 29616(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 155
            movddup 1112(pB), rB0
            MOVAPD 29632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 29648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 29664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 29680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 29696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 29712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 29728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 29744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 29760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 29776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 29792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 29808(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 156
            movddup 1120(pB), rB0
            MOVAPD 29824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 29840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 29856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 29872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 29888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 29904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 29920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 29936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 29952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 29968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 29984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 30000(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 157
            movddup 1128(pB), rB0
            MOVAPD 30016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 30032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 30048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 30064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 30080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 30096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 30112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 30128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 30144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 30160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 30176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 30192(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 158
            movddup 1136(pB), rB0
            MOVAPD 30208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 30224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 30240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 30256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 30272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 30288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 30304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 30320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 30336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 30352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 30368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 30384(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 159
            movddup 1144(pB), rB0
            MOVAPD 30400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 30416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 30432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 30448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 30464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 30480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 30496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 30512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 30528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 30544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 30560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 30576(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 160
            movddup 1152(pB), rB0
            MOVAPD 30592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 30608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 30624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 30640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 30656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 30672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 30688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 30704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 30720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 30736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 30752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 30768(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 161
            movddup 1160(pB), rB0
            MOVAPD 30784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 30800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 30816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 30832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 30848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 30864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 30880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 30896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 30912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 30928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 30944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 30960(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 162
            movddup 1168(pB), rB0
            MOVAPD 30976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 30992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 31008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 31024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 31040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 31056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 31072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 31088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 31104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 31120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 31136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 31152(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 163
            movddup 1176(pB), rB0
            MOVAPD 31168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 31184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 31200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 31216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 31232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 31248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 31264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 31280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 31296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 31312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 31328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 31344(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 164
            movddup 1184(pB), rB0
            MOVAPD 31360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 31376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 31392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 31408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 31424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 31440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 31456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 31472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 31488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 31504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 31520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 31536(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 165
            movddup 1192(pB), rB0
            MOVAPD 31552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 31568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 31584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 31600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 31616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 31632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 31648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 31664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 31680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 31696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 31712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 31728(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 166
            movddup 1200(pB), rB0
            MOVAPD 31744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 31760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 31776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 31792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 31808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 31824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 31840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 31856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 31872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 31888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 31904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 31920(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 167
            movddup 1208(pB), rB0
            MOVAPD 31936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 31952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 31968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 31984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 32000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 32016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 32032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 32048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 32064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 32080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 32096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 32112(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 168
            movddup 1216(pB), rB0
            MOVAPD 32128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 32144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 32160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 32176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 32192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 32208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 32224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 32240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 32256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 32272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 32288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 32304(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 169
            movddup 1224(pB), rB0
            MOVAPD 32320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 32336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 32352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 32368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 32384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 32400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 32416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 32432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 32448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 32464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 32480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 32496(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 170
            movddup 1232(pB), rB0
            MOVAPD 32512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 32528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 32544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 32560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 32576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 32592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 32608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 32624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 32640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 32656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 32672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 32688(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 171
            movddup 1240(pB), rB0
            MOVAPD 32704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 32720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 32736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 32752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 32768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 32784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 32800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 32816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 32832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 32848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 32864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 32880(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 172
            movddup 1248(pB), rB0
            MOVAPD 32896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 32912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 32928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 32944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 32960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 32976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 32992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 33008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 33024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 33040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 33056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 33072(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 173
            movddup 1256(pB), rB0
            MOVAPD 33088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 33104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 33120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 33136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 33152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 33168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 33184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 33200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 33216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 33232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 33248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 33264(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 174
            movddup 1264(pB), rB0
            MOVAPD 33280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 33296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 33312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 33328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 33344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 33360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 33376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 33392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 33408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 33424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 33440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 33456(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 175
            movddup 1272(pB), rB0
            MOVAPD 33472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 33488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 33504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 33520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 33536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 33552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 33568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 33584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 33600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 33616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 33632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 33648(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 176
            movddup 1280(pB), rB0
            MOVAPD 33664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 33680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 33696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 33712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 33728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 33744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 33760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 33776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 33792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 33808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 33824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 33840(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 177
            movddup 1288(pB), rB0
            MOVAPD 33856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 33872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 33888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 33904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 33920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 33936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 33952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 33968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 33984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 34000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 34016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 34032(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 178
            movddup 1296(pB), rB0
            MOVAPD 34048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 34064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 34080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 34096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 34112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 34128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 34144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 34160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 34176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 34192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 34208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 34224(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 179
            movddup 1304(pB), rB0
            MOVAPD 34240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 34256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 34272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 34288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 34304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 34320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 34336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 34352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 34368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 34384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 34400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 34416(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 180
            movddup 1312(pB), rB0
            MOVAPD 34432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 34448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 34464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 34480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 34496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 34512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 34528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 34544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 34560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 34576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 34592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 34608(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 181
            movddup 1320(pB), rB0
            MOVAPD 34624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 34640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 34656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 34672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 34688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 34704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 34720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 34736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 34752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 34768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 34784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 34800(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 182
            movddup 1328(pB), rB0
            MOVAPD 34816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 34832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 34848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 34864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 34880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 34896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 34912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 34928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 34944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 34960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 34976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 34992(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 183
            movddup 1336(pB), rB0
            MOVAPD 35008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 35024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 35040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 35056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 35072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 35088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 35104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 35120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 35136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 35152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 35168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 35184(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 184
            movddup 1344(pB), rB0
            MOVAPD 35200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 35216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 35232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 35248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 35264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 35280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 35296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 35312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 35328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 35344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 35360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 35376(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 185
            movddup 1352(pB), rB0
            MOVAPD 35392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 35408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 35424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 35440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 35456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 35472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 35488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 35504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 35520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 35536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 35552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 35568(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 186
            movddup 1360(pB), rB0
            MOVAPD 35584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 35600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 35616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 35632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 35648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 35664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 35680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 35696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 35712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 35728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 35744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 35760(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 187
            movddup 1368(pB), rB0
            MOVAPD 35776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 35792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 35808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 35824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 35840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 35856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 35872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 35888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 35904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 35920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 35936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 35952(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 188
            movddup 1376(pB), rB0
            MOVAPD 35968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 35984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 36000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 36016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 36032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 36048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 36064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 36080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 36096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 36112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 36128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 36144(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 189
            movddup 1384(pB), rB0
            MOVAPD 36160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 36176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 36192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 36208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 36224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 36240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 36256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 36272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 36288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 36304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 36320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 36336(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 190
            movddup 1392(pB), rB0
            MOVAPD 36352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 36368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 36384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 36400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 36416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 36432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 36448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 36464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 36480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 36496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 36512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 36528(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 191
            movddup 1400(pB), rB0
            MOVAPD 36544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 36560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 36576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 36592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 36608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 36624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 36640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 36656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 36672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 36688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 36704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 36720(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 192
            movddup 1408(pB), rB0
            MOVAPD 36736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 36752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 36768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 36784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 36800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 36816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 36832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 36848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 36864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 36880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 36896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 36912(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 193
            movddup 1416(pB), rB0
            MOVAPD 36928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 36944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 36960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 36976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 36992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 37008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 37024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 37040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 37056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 37072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 37088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 37104(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 194
            movddup 1424(pB), rB0
            MOVAPD 37120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 37136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 37152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 37168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 37184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 37200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 37216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 37232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 37248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 37264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 37280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 37296(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 195
            movddup 1432(pB), rB0
            MOVAPD 37312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 37328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 37344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 37360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 37376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 37392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 37408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 37424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 37440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 37456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 37472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 37488(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 196
            movddup 1440(pB), rB0
            MOVAPD 37504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 37520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 37536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 37552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 37568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 37584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 37600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 37616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 37632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 37648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 37664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 37680(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 197
            movddup 1448(pB), rB0
            MOVAPD 37696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 37712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 37728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 37744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 37760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 37776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 37792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 37808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 37824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 37840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 37856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 37872(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 198
            movddup 1456(pB), rB0
            MOVAPD 37888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 37904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 37920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 37936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 37952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 37968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 37984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 38000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 38016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 38032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 38048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 38064(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 199
            movddup 1464(pB), rB0
            MOVAPD 38080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 38096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 38112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 38128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 38144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 38160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 38176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 38192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 38208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 38224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 38240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 38256(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 200
            movddup 1472(pB), rB0
            MOVAPD 38272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 38288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 38304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 38320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 38336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 38352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 38368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 38384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 38400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 38416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 38432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 38448(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 201
            movddup 1480(pB), rB0
            MOVAPD 38464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 38480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 38496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 38512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 38528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 38544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 38560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 38576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 38592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 38608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 38624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 38640(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 202
            movddup 1488(pB), rB0
            MOVAPD 38656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 38672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 38688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 38704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 38720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 38736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 38752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 38768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 38784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 38800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 38816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 38832(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 203
            movddup 1496(pB), rB0
            MOVAPD 38848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 38864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 38880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 38896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 38912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 38928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 38944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 38960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 38976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 38992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 39008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 39024(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 204
            movddup 1504(pB), rB0
            MOVAPD 39040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 39056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 39072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 39088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 39104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 39120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 39136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 39152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 39168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 39184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 39200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 39216(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 205
            movddup 1512(pB), rB0
            MOVAPD 39232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 39248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 39264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 39280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 39296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 39312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 39328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 39344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 39360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 39376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 39392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 39408(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 206
            movddup 1520(pB), rB0
            MOVAPD 39424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 39440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 39456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 39472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 39488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 39504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 39520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 39536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 39552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 39568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 39584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 39600(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 207
            movddup 1528(pB), rB0
            MOVAPD 39616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 39632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 39648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 39664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 39680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 39696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 39712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 39728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 39744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 39760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 39776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 39792(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 208
            movddup 1536(pB), rB0
            MOVAPD 39808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 39824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 39840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 39856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 39872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 39888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 39904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 39920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 39936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 39952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 39968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 39984(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 209
            movddup 1544(pB), rB0
            MOVAPD 40000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 40016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 40032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 40048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 40064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 40080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 40096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 40112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 40128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 40144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 40160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 40176(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 210
            movddup 1552(pB), rB0
            MOVAPD 40192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 40208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 40224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 40240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 40256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 40272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 40288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 40304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 40320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 40336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 40352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 40368(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 211
            movddup 1560(pB), rB0
            MOVAPD 40384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 40400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 40416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 40432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 40448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 40464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 40480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 40496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 40512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 40528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 40544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 40560(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 212
            movddup 1568(pB), rB0
            MOVAPD 40576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 40592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 40608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 40624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 40640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 40656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 40672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 40688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 40704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 40720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 40736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 40752(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 213
            movddup 1576(pB), rB0
            MOVAPD 40768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 40784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 40800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 40816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 40832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 40848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 40864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 40880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 40896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 40912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 40928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 40944(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 214
            movddup 1584(pB), rB0
            MOVAPD 40960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 40976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 40992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 41008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 41024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 41040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 41056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 41072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 41088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 41104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 41120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 41136(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 215
            movddup 1592(pB), rB0
            MOVAPD 41152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 41168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 41184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 41200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 41216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 41232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 41248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 41264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 41280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 41296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 41312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 41328(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 216
            movddup 1600(pB), rB0
            MOVAPD 41344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 41360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 41376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 41392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 41408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 41424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 41440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 41456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 41472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 41488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 41504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 41520(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 217
            movddup 1608(pB), rB0
            MOVAPD 41536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 41552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 41568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 41584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 41600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 41616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 41632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 41648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 41664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 41680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 41696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 41712(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 218
            movddup 1616(pB), rB0
            MOVAPD 41728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 41744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 41760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 41776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 41792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 41808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 41824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 41840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 41856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 41872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 41888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 41904(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 219
            movddup 1624(pB), rB0
            MOVAPD 41920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 41936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 41952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 41968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 41984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 42000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 42016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 42032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 42048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 42064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 42080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 42096(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 220
            movddup 1632(pB), rB0
            MOVAPD 42112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 42128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 42144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 42160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 42176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 42192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 42208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 42224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 42240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 42256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 42272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 42288(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 221
            movddup 1640(pB), rB0
            MOVAPD 42304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 42320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 42336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 42352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 42368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 42384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 42400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 42416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 42432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 42448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 42464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 42480(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 222
            movddup 1648(pB), rB0
            MOVAPD 42496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 42512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 42528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 42544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 42560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 42576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 42592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 42608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 42624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 42640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 42656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 42672(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 223
            movddup 1656(pB), rB0
            MOVAPD 42688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 42704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 42720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 42736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 42752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 42768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 42784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 42800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 42816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 42832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 42848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 42864(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 224
            movddup 1664(pB), rB0
            MOVAPD 42880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 42896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 42912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 42928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 42944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 42960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 42976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 42992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 43008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 43024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 43040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 43056(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 225
            movddup 1672(pB), rB0
            MOVAPD 43072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 43088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 43104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 43120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 43136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 43152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 43168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 43184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 43200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 43216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 43232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 43248(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 226
            movddup 1680(pB), rB0
            MOVAPD 43264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 43280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 43296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 43312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 43328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 43344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 43360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 43376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 43392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 43408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 43424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 43440(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 227
            movddup 1688(pB), rB0
            MOVAPD 43456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 43472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 43488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 43504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 43520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 43536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 43552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 43568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 43584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 43600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 43616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 43632(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 228
            movddup 1696(pB), rB0
            MOVAPD 43648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 43664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 43680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 43696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 43712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 43728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 43744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 43760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 43776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 43792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 43808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 43824(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 229
            movddup 1704(pB), rB0
            MOVAPD 43840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 43856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 43872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 43888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 43904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 43920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 43936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 43952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 43968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 43984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 44000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 44016(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 230
            movddup 1712(pB), rB0
            MOVAPD 44032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 44048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 44064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 44080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 44096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 44112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 44128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 44144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 44160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 44176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 44192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 44208(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 231
            movddup 1720(pB), rB0
            MOVAPD 44224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 44240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 44256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 44272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 44288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 44304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 44320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 44336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 44352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 44368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 44384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 44400(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 232
            movddup 1728(pB), rB0
            MOVAPD 44416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 44432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 44448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 44464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 44480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 44496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 44512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 44528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 44544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 44560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 44576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 44592(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 233
            movddup 1736(pB), rB0
            MOVAPD 44608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 44624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 44640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 44656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 44672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 44688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 44704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 44720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 44736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 44752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 44768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 44784(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 234
            movddup 1744(pB), rB0
            MOVAPD 44800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 44816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 44832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 44848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 44864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 44880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 44896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 44912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 44928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 44944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 44960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 44976(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 235
            movddup 1752(pB), rB0
            MOVAPD 44992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 45008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 45024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 45040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 45056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 45072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 45088(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 45104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 45120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 45136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 45152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 45168(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 236
            movddup 1760(pB), rB0
            MOVAPD 45184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 45200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 45216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 45232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 45248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 45264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 45280(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 45296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 45312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 45328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 45344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 45360(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 237
            movddup 1768(pB), rB0
            MOVAPD 45376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 45392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 45408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 45424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 45440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 45456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 45472(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 45488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 45504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 45520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 45536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 45552(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 238
            movddup 1776(pB), rB0
            MOVAPD 45568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 45584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 45600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 45616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 45632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 45648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 45664(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 45680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 45696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 45712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 45728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 45744(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 239
            movddup 1784(pB), rB0
            MOVAPD 45760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 45776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 45792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 45808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 45824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 45840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 45856(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 45872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 45888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 45904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 45920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 45936(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 240
            movddup 1792(pB), rB0
            MOVAPD 45952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 45968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 45984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 46000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 46016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 46032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 46048(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 46064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 46080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 46096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 46112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 46128(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 241
            movddup 1800(pB), rB0
            MOVAPD 46144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 46160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 46176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 46192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 46208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 46224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 46240(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 46256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 46272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 46288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 46304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 46320(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 242
            movddup 1808(pB), rB0
            MOVAPD 46336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 46352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 46368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 46384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 46400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 46416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 46432(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 46448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 46464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 46480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 46496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 46512(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 243
            movddup 1816(pB), rB0
            MOVAPD 46528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 46544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 46560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 46576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 46592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 46608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 46624(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 46640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 46656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 46672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 46688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 46704(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 244
            movddup 1824(pB), rB0
            MOVAPD 46720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 46736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 46752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 46768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 46784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 46800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 46816(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 46832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 46848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 46864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 46880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 46896(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 245
            movddup 1832(pB), rB0
            MOVAPD 46912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 46928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 46944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 46960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 46976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 46992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 47008(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 47024(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 47040(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 47056(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 47072(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 47088(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 246
            movddup 1840(pB), rB0
            MOVAPD 47104(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 47120(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 47136(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 47152(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 47168(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 47184(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 47200(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 47216(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 47232(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 47248(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 47264(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 47280(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 247
            movddup 1848(pB), rB0
            MOVAPD 47296(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 47312(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 47328(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 47344(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 47360(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 47376(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 47392(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 47408(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 47424(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 47440(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 47456(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 47472(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 248
            movddup 1856(pB), rB0
            MOVAPD 47488(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 47504(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 47520(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 47536(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 47552(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 47568(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 47584(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 47600(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 47616(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 47632(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 47648(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 47664(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 249
            movddup 1864(pB), rB0
            MOVAPD 47680(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 47696(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 47712(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 47728(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 47744(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 47760(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 47776(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 47792(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 47808(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 47824(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 47840(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 47856(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 250
            movddup 1872(pB), rB0
            MOVAPD 47872(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 47888(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 47904(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 47920(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 47936(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 47952(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 47968(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 47984(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 48000(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 48016(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 48032(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 48048(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 251
            movddup 1880(pB), rB0
            MOVAPD 48064(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 48080(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 48096(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 48112(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 48128(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 48144(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 48160(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 48176(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 48192(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 48208(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 48224(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 48240(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 252
            movddup 1888(pB), rB0
            MOVAPD 48256(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 48272(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 48288(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 48304(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 48320(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 48336(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 48352(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 48368(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 48384(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 48400(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 48416(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 48432(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 253
            movddup 1896(pB), rB0
            MOVAPD 48448(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 48464(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 48480(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 48496(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 48512(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 48528(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 48544(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 48560(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 48576(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 48592(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 48608(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 48624(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 254
            movddup 1904(pB), rB0
            MOVAPD 48640(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 48656(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 48672(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 48688(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 48704(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 48720(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 48736(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 48752(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 48768(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 48784(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 48800(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 48816(pA), rB0
            addpd rB0, rC11
         #endif
         #if KB > 255
            movddup 1912(pB), rB0
            MOVAPD 48832(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC00
            MOVAPD 48848(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC01
            MOVAPD 48864(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC02
            MOVAPD 48880(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC03
            MOVAPD 48896(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC04
            MOVAPD 48912(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC05
            MOVAPD 48928(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC06
            MOVAPD 48944(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC07
            MOVAPD 48960(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC08
            MOVAPD 48976(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC09
            MOVAPD 48992(pA), rA0
            mulpd rB0, rA0
            addpd rA0, rC10
            mulpd 49008(pA), rB0
            addpd rB0, rC11
         #endif
         #if defined(BETA1) || defined(BETAN1)
            BETCOP -128(pC), rC00
            MOVAPD rC00, -128(pC)
            BETCOP -112(pC), rC01
            MOVAPD rC01, -112(pC)
            BETCOP -96(pC), rC02
            MOVAPD rC02, -96(pC)
            BETCOP -80(pC), rC03
            MOVAPD rC03, -80(pC)
            BETCOP -64(pC), rC04
            MOVAPD rC04, -64(pC)
            BETCOP -48(pC), rC05
            MOVAPD rC05, -48(pC)
            BETCOP -32(pC), rC06
            MOVAPD rC06, -32(pC)
            BETCOP -16(pC), rC07
            MOVAPD rC07, -16(pC)
            BETCOP (pC), rC08
            MOVAPD rC08, (pC)
            BETCOP 16(pC), rC09
            MOVAPD rC09, 16(pC)
            BETCOP 32(pC), rC10
            MOVAPD rC10, 32(pC)
            BETCOP 48(pC), rC11
            MOVAPD rC11, 48(pC)
         #else
            MOVAPD rC00, -128(pC)
            MOVAPD rC01, -112(pC)
            MOVAPD rC02, -96(pC)
            MOVAPD rC03, -80(pC)
            MOVAPD rC04, -64(pC)
            MOVAPD rC05, -48(pC)
            MOVAPD rC06, -32(pC)
            MOVAPD rC07, -16(pC)
            MOVAPD rC08, (pC)
            MOVAPD rC09, 16(pC)
            MOVAPD rC10, 32(pC)
            MOVAPD rC11, 48(pC)
         #endif
         add incPF, pfA
         add incPF, pfB
         add $192, pC
         add $KB*1*8, pB   /* pB += KB*NU*sizeof */
      sub $1, nnu
      jnz MNLOOP
      mov nnu0, nnu
      mov pB0, pB
      add incAm, pA
   sub $1, nmu
   jnz MNLOOP

/* DONE: */
   movq    (%rsp), %rbp
   movq    8(%rsp), %rbx
   movq    16(%rsp), %r12
   add $FSIZE, %rsp
   ret
#if 0
.global findSize
findSize:
mov $SS1-SS0, %rax
ret
SS0:
SS1:
#endif
