; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; Supported combines

define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-SD-LABEL: dupsext_v8i8_v8i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.8b, w0
; CHECK-SD-NEXT:    smull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupsext_v8i8_v8i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    lsl w8, w0, #8
; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
; CHECK-GI-NEXT:    dup v1.8h, w8
; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT:    ret
entry:
  %in = sext i8 %src to i16
  %ext.b = sext <8 x i8> %b to <8 x i16>
  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
  ret <8 x i16> %out
}

define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-SD-LABEL: dupzext_v8i8_v8i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.8b, w0
; CHECK-SD-NEXT:    umull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupzext_v8i8_v8i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    and w8, w0, #0xff
; CHECK-GI-NEXT:    dup v1.8h, w8
; CHECK-GI-NEXT:    xtn v1.8b, v1.8h
; CHECK-GI-NEXT:    umull v0.8h, v1.8b, v0.8b
; CHECK-GI-NEXT:    ret
entry:
  %in = zext i8 %src to i16
  %ext.b = zext <8 x i8> %b to <8 x i16>
  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul nuw <8 x i16> %broadcast.splat, %ext.b
  ret <8 x i16> %out
}

define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-SD-LABEL: dupsext_v4i16_v4i32:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.4h, w0
; CHECK-SD-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupsext_v4i16_v4i32:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sxth w8, w0
; CHECK-GI-NEXT:    dup v1.4s, w8
; CHECK-GI-NEXT:    xtn v1.4h, v1.4s
; CHECK-GI-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-GI-NEXT:    ret
entry:
  %in = sext i16 %src to i32
  %ext.b = sext <4 x i16> %b to <4 x i32>
  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul nsw <4 x i32> %broadcast.splat, %ext.b
  ret <4 x i32> %out
}

define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-SD-LABEL: dupzext_v4i16_v4i32:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.4h, w0
; CHECK-SD-NEXT:    umull v0.4s, v1.4h, v0.4h
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupzext_v4i16_v4i32:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    and w8, w0, #0xffff
; CHECK-GI-NEXT:    dup v1.4s, w8
; CHECK-GI-NEXT:    xtn v1.4h, v1.4s
; CHECK-GI-NEXT:    umull v0.4s, v1.4h, v0.4h
; CHECK-GI-NEXT:    ret
entry:
  %in = zext i16 %src to i32
  %ext.b = zext <4 x i16> %b to <4 x i32>
  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul nuw <4 x i32> %broadcast.splat, %ext.b
  ret <4 x i32> %out
}

define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-SD-LABEL: dupsext_v2i32_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.2s, w0
; CHECK-SD-NEXT:    smull v0.2d, v1.2s, v0.2s
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupsext_v2i32_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT:    sxtw x8, w0
; CHECK-GI-NEXT:    dup v1.2d, x8
; CHECK-GI-NEXT:    xtn v1.2s, v1.2d
; CHECK-GI-NEXT:    smull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT:    ret
entry:
  %in = sext i32 %src to i64
  %ext.b = sext <2 x i32> %b to <2 x i64>
  %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul nsw <2 x i64> %broadcast.splat, %ext.b
  ret <2 x i64> %out
}

define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-SD-LABEL: dupzext_v2i32_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.2s, w0
; CHECK-SD-NEXT:    umull v0.2d, v1.2s, v0.2s
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupzext_v2i32_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    mov w8, w0
; CHECK-GI-NEXT:    dup v1.2d, x8
; CHECK-GI-NEXT:    xtn v1.2s, v1.2d
; CHECK-GI-NEXT:    umull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT:    ret
entry:
  %in = zext i32 %src to i64
  %ext.b = zext <2 x i32> %b to <2 x i64>
  %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
  ret <2 x i64> %out
}

define <2 x i32> @dupzext_v2i32_v2i64_trunc(i32 %src, <2 x i32> %b) {
; CHECK-SD-LABEL: dupzext_v2i32_v2i64_trunc:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.2s, w0
; CHECK-SD-NEXT:    smull v0.2d, v1.2s, v0.2s
; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupzext_v2i32_v2i64_trunc:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    mov w8, w0
; CHECK-GI-NEXT:    dup v1.2d, x8
; CHECK-GI-NEXT:    xtn v1.2s, v1.2d
; CHECK-GI-NEXT:    umull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
; CHECK-GI-NEXT:    ret
entry:
  %in = zext i32 %src to i64
  %ext.b = zext <2 x i32> %b to <2 x i64>
  %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %in, i64 0
  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
  %prod = mul nuw <2 x i64> %broadcast.splat, %ext.b
  %out = trunc <2 x i64> %prod to <2 x i32>
  ret <2 x i32> %out
}

; Unsupported combines

define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
; CHECK-SD-LABEL: dupsext_v2i8_v2i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    sxtb w8, w0
; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
; CHECK-SD-NEXT:    dup v1.2s, w8
; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT:    mul v0.2s, v1.2s, v0.2s
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupsext_v2i8_v2i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    lsl w8, w0, #8
; CHECK-GI-NEXT:    shl v0.2s, v0.2s, #24
; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
; CHECK-GI-NEXT:    sshr v0.2s, v0.2s, #24
; CHECK-GI-NEXT:    dup v1.4h, w8
; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT:    mul v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT:    ret
entry:
  %in = sext i8 %src to i16
  %ext.b = sext <2 x i8> %b to <2 x i16>
  %broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
  %broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
  %out = mul nsw <2 x i16> %broadcast.splat, %ext.b
  ret <2 x i16> %out
}

define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
; CHECK-SD-LABEL: dupzext_v2i16_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi d1, #0x00ffff0000ffff
; CHECK-SD-NEXT:    and w8, w0, #0xffff
; CHECK-SD-NEXT:    dup v2.2s, w8
; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT:    umull v0.2d, v2.2s, v0.2s
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: dupzext_v2i16_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    movi v1.2d, #0x0000000000ffff
; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT:    and x8, x0, #0xffff
; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT:    dup v1.2d, x8
; CHECK-GI-NEXT:    xtn v1.2s, v1.2d
; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
; CHECK-GI-NEXT:    umull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT:    ret
entry:
  %in = zext i16 %src to i64
  %ext.b = zext <2 x i16> %b to <2 x i64>
  %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
  %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
  ret <2 x i64> %out
}

; dupsext_v4i8_v4i16
; dupsext_v2i8_v2i32
; dupsext_v4i8_v4i32
; dupsext_v2i8_v2i64
; dupsext_v2i16_v2i32
; dupsext_v2i16_v2i64
; dupzext_v2i8_v2i16
; dupzext_v4i8_v4i16
; dupzext_v2i8_v2i32
; dupzext_v4i8_v4i32
; dupzext_v2i8_v2i64
; dupzext_v2i16_v2i32
; dupzext_v2i16_v2i64

; Unsupported states

define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
; CHECK-SD-LABEL: nonsplat_shuffleinsert:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    dup v1.8b, w0
; CHECK-SD-NEXT:    smull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: nonsplat_shuffleinsert:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    lsl w8, w0, #8
; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
; CHECK-GI-NEXT:    mov v1.h[1], w8
; CHECK-GI-NEXT:    ext v1.16b, v1.16b, v1.16b, #4
; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT:    ret
entry:
  %in = sext i8 %src to i16
  %ext.b = sext <8 x i8> %b to <8 x i16>
  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
  ret <8 x i16> %out
}

define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) {
; CHECK-SD-LABEL: nonsplat_shuffleinsert2:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    fmov s1, w0
; CHECK-SD-NEXT:    mov v1.h[1], w1
; CHECK-SD-NEXT:    mov v1.h[2], w2
; CHECK-SD-NEXT:    mov v1.h[3], w3
; CHECK-SD-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: nonsplat_shuffleinsert2:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sxth w8, w0
; CHECK-GI-NEXT:    sxth w9, w1
; CHECK-GI-NEXT:    fmov s1, w8
; CHECK-GI-NEXT:    sxth w8, w2
; CHECK-GI-NEXT:    mov v1.h[1], w9
; CHECK-GI-NEXT:    mov v1.h[2], w8
; CHECK-GI-NEXT:    sxth w8, w3
; CHECK-GI-NEXT:    mov v1.h[3], w8
; CHECK-GI-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-GI-NEXT:    ret
entry:
  %s0 = sext i16 %b0 to i32
  %s1 = sext i16 %b1 to i32
  %s2 = sext i16 %b2 to i32
  %s3 = sext i16 %b3 to i32
  %ext.b = sext <4 x i16> %b to <4 x i32>
  %v0 = insertelement <4 x i32> undef, i32 %s0, i32 0
  %v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1
  %v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2
  %v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3
  %out = mul nsw <4 x i32> %v3, %ext.b
  ret <4 x i32> %out
}

define void @typei1_orig(i64 %a, ptr %p, ptr %q) {
; CHECK-SD-LABEL: typei1_orig:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    cmp x0, #0
; CHECK-SD-NEXT:    ldr q0, [x2]
; CHECK-SD-NEXT:    cset w8, gt
; CHECK-SD-NEXT:    dup v1.8h, w8
; CHECK-SD-NEXT:    cmtst v0.8h, v0.8h, v0.8h
; CHECK-SD-NEXT:    cmeq v1.8h, v1.8h, #0
; CHECK-SD-NEXT:    bic v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
; CHECK-SD-NEXT:    str q0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: typei1_orig:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldr q0, [x2]
; CHECK-GI-NEXT:    cmp x0, #0
; CHECK-GI-NEXT:    cset w8, gt
; CHECK-GI-NEXT:    neg v0.8h, v0.8h
; CHECK-GI-NEXT:    dup v1.8h, w8
; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT:    movi v1.2d, #0xffffffffffffffff
; CHECK-GI-NEXT:    cmtst v0.8h, v0.8h, v0.8h
; CHECK-GI-NEXT:    mvn v1.16b, v1.16b
; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT:    str q0, [x1]
; CHECK-GI-NEXT:    ret
  %tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
  %tmp6 = load <8 x i16>, ptr %q, align 2
  %tmp7 = sub <8 x i16> zeroinitializer, %tmp6
  %tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %tmp9 = icmp slt i64 0, %a
  %tmp10 = zext i1 %tmp9 to i16
  %tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0
  %tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer
  %tmp13 = mul nuw <16 x i16> %tmp8, %tmp12
  %tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer
  %tmp15 = and <16 x i1> %tmp14, %tmp
  %tmp16 = sext <16 x i1> %tmp15 to <16 x i8>
  store <16 x i8> %tmp16, ptr %p, align 1
  ret void
}

define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
; CHECK-SD-LABEL: typei1_v8i1_v8i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi v1.8b, #1
; CHECK-SD-NEXT:    and w8, w0, #0x1
; CHECK-SD-NEXT:    dup v2.8b, w8
; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT:    umull v0.8h, v2.8b, v0.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: typei1_v8i1_v8i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    movi v1.8h, #1
; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT:    and w8, w0, #0x1
; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT:    dup v1.8h, w8
; CHECK-GI-NEXT:    mul v0.8h, v1.8h, v0.8h
; CHECK-GI-NEXT:    ret
entry:
  %in = zext i1 %src to i16
  %ext.b = zext <8 x i1> %b to <8 x i16>
  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
  ret <8 x i16> %out
}

define <8 x i16> @missing_insert(<8 x i8> %b) {
; CHECK-SD-LABEL: missing_insert:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    ext v1.8b, v0.8b, v0.8b, #2
; CHECK-SD-NEXT:    smull v0.8h, v1.8b, v0.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: missing_insert:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sshll v1.8h, v0.8b, #0
; CHECK-GI-NEXT:    ext v1.16b, v1.16b, v1.16b, #4
; CHECK-GI-NEXT:    xtn v1.8b, v1.8h
; CHECK-GI-NEXT:    smull v0.8h, v1.8b, v0.8b
; CHECK-GI-NEXT:    ret
entry:
  %ext.b = sext <8 x i8> %b to <8 x i16>
  %broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
  %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
  ret <8 x i16> %out
}

define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-SD-LABEL: shufsext_v8i8_v8i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    rev64 v0.8b, v0.8b
; CHECK-SD-NEXT:    smull v0.8h, v0.8b, v1.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT:    rev64 v0.8h, v0.8h
; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
; CHECK-GI-NEXT:    smull v0.8h, v0.8b, v1.8b
; CHECK-GI-NEXT:    ret
entry:
  %in = sext <8 x i8> %src to <8 x i16>
  %ext.b = sext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}

define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-SD-LABEL: shufsext_v2i32_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    rev64 v0.2s, v0.2s
; CHECK-SD-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-GI-NEXT:    ret
entry:
  %in = sext <2 x i32> %src to <2 x i64>
  %ext.b = sext <2 x i32> %b to <2 x i64>
  %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
  %out = mul nsw <2 x i64> %shuf, %ext.b
  ret <2 x i64> %out
}

define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-SD-LABEL: shufzext_v8i8_v8i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    rev64 v0.8b, v0.8b
; CHECK-SD-NEXT:    umull v0.8h, v0.8b, v1.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT:    rev64 v0.8h, v0.8h
; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
; CHECK-GI-NEXT:    umull v0.8h, v0.8b, v1.8b
; CHECK-GI-NEXT:    ret
entry:
  %in = zext <8 x i8> %src to <8 x i16>
  %ext.b = zext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}

define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-SD-LABEL: shufzext_v2i32_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    rev64 v0.2s, v0.2s
; CHECK-SD-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
; CHECK-GI-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-GI-NEXT:    ret
entry:
  %in = sext <2 x i32> %src to <2 x i64>
  %ext.b = sext <2 x i32> %b to <2 x i64>
  %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
  %out = mul nsw <2 x i64> %shuf, %ext.b
  ret <2 x i64> %out
}

define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
; CHECK-SD-LABEL: shufzext_v8i8_v8i16_twoin:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    trn1 v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT:    umull v0.8h, v0.8b, v2.8b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16_twoin:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT:    trn1 v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
; CHECK-GI-NEXT:    umull v0.8h, v0.8b, v2.8b
; CHECK-GI-NEXT:    ret
entry:
  %in1 = zext <8 x i8> %src1 to <8 x i16>
  %in2 = zext <8 x i8> %src2 to <8 x i16>
  %ext.b = zext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}

define <8 x i16> @shufszext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
; CHECK-LABEL: shufszext_v8i8_v8i16_twoin:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
; CHECK-NEXT:    trn1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ushll v1.8h, v2.8b, #0
; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
entry:
  %in1 = zext <8 x i8> %src1 to <8 x i16>
  %in2 = sext <8 x i8> %src2 to <8 x i16>
  %ext.b = zext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}

