diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index e8041e22b031ce..b94c0c19240620 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file "describes" induction and recurrence variables. +// This file "describes" induction, recurrence, and conditional scalar +// assignment (CSA) variables. // //===----------------------------------------------------------------------===// @@ -423,6 +424,61 @@ class InductionDescriptor { SmallVector RedundantCasts; }; +/// A Conditional Scalar Assignment (CSA) is an assignment from an initial +/// scalar that may or may not occur. +class CSADescriptor { + /// If the conditional assignment occurs inside a loop, then Phi chooses + /// the value of the assignment from the entry block or the loop body block. + PHINode *Phi = nullptr; + + /// The initial value of the CSA. If the condition guarding the assignment is + /// not met, then the assignment retains this value. + Value *InitScalar = nullptr; + + /// The Instruction that conditionally assigned to inside the loop. + Instruction *Assignment = nullptr; + + /// Create a CSA Descriptor that models a valid CSA with its members + /// initialized correctly. + CSADescriptor(PHINode *Phi, Instruction *Assignment, Value *InitScalar) + : Phi(Phi), InitScalar(InitScalar), Assignment(Assignment) {} + +public: + /// Create a CSA Descriptor that models an invalid CSA. + CSADescriptor() = default; + + /// If Phi is the root of a CSA, set CSADesc as the CSA rooted by + /// Phi. Otherwise, return a false, leaving CSADesc unmodified. + static bool isCSAPhi(PHINode *Phi, Loop *TheLoop, CSADescriptor &CSADesc); + + operator bool() const { return isValid(); } + + /// Returns whether SI is the Assignment in CSA + static bool isCSASelect(CSADescriptor Desc, SelectInst *SI) { + return Desc.getAssignment() == SI; + } + + /// Return whether this CSADescriptor models a valid CSA. + bool isValid() const { return Phi && InitScalar && Assignment; } + + /// Return the PHI that roots this CSA. + PHINode *getPhi() const { return Phi; } + + /// Return the initial value of the CSA. This is the value if the conditional + /// assignment does not occur. + Value *getInitScalar() const { return InitScalar; } + + /// The Instruction that is used after the loop + Instruction *getAssignment() const { return Assignment; } + + /// Return the condition that this CSA is conditional upon. + Value *getCond() const { + if (auto *SI = dyn_cast_or_null(Assignment)) + return SI->getCondition(); + return nullptr; + } +}; + } // end namespace llvm #endif // LLVM_ANALYSIS_IVDESCRIPTORS_H diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c4d5459d250924..bcbfa6d6979588 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1828,6 +1828,10 @@ class TargetTransformInfo { : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {} }; + /// \returns true if the loop vectorizer should vectorize conditional + /// scalar assignments for the target. + bool enableCSAVectorization() const; + /// \returns How the target needs this vector-predicated operation to be /// transformed. VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const; @@ -2266,6 +2270,7 @@ class TargetTransformInfo::Concept { SmallVectorImpl &OpsToSink) const = 0; virtual bool isVectorShiftByScalarCheap(Type *Ty) const = 0; + virtual bool enableCSAVectorization() const = 0; virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; @@ -3077,6 +3082,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.isVectorShiftByScalarCheap(Ty); } + bool enableCSAVectorization() const override { + return Impl.enableCSAVectorization(); + } + VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const override { return Impl.getVPLegalizationStrategy(PI); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 48ebffff8cbfc2..13a3ff99ea6457 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1016,6 +1016,8 @@ class TargetTransformInfoImplBase { bool isVectorShiftByScalarCheap(Type *Ty) const { return false; } + bool enableCSAVectorization() const { return false; } + TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const { return TargetTransformInfo::VPLegalization( diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index fbe80eddbae07a..38b1f1d731adb5 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -269,6 +269,10 @@ class LoopVectorizationLegality { /// induction descriptor. using InductionList = MapVector; + /// CSAList contains the CSA descriptors for all the CSAs that were found + /// in the loop, rooted by their phis. + using CSAList = MapVector; + /// RecurrenceSet contains the phi nodes that are recurrences other than /// inductions and reductions. using RecurrenceSet = SmallPtrSet; @@ -321,6 +325,12 @@ class LoopVectorizationLegality { /// Returns True if V is a Phi node of an induction variable in this loop. bool isInductionPhi(const Value *V) const; + /// Returns the CSAs found in the loop. + const CSAList &getCSAs() const { return CSAs; } + + /// Returns true if Phi is the root of a CSA in the loop. + bool isCSAPhi(PHINode *Phi) const { return CSAs.count(Phi) != 0; } + /// Returns a pointer to the induction descriptor, if \p Phi is an integer or /// floating point induction. const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const; @@ -550,6 +560,10 @@ class LoopVectorizationLegality { void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit); + // Updates the vetorization state by adding \p Phi to the CSA list. + void addCSAPhi(PHINode *Phi, const CSADescriptor &CSADesc, + SmallPtrSetImpl &AllowedExit); + /// The loop that we evaluate. Loop *TheLoop; @@ -594,6 +608,9 @@ class LoopVectorizationLegality { /// variables can be pointers. InductionList Inductions; + /// Holds the conditional scalar assignments + CSAList CSAs; + /// Holds all the casts that participate in the update chain of the induction /// variables, and that have been proven to be redundant (possibly under a /// runtime guard). These casts can be ignored when creating the vectorized diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index f74ede4450ce52..06b04564e4064c 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file "describes" induction and recurrence variables. +// This file "describes" induction, recurrence, and conditional scalar +// assignment (CSA) variables. // //===----------------------------------------------------------------------===// @@ -1570,3 +1571,58 @@ bool InductionDescriptor::isInductionPHI( D = InductionDescriptor(StartValue, IK_PtrInduction, Step); return true; } + +/// Return CSADescriptor that describes a CSA that matches one of these +/// patterns: +/// phi loop_inv, (select cmp, value, phi) +/// phi loop_inv, (select cmp, phi, value) +/// phi (select cmp, value, phi), loop_inv +/// phi (select cmp, phi, value), loop_inv +/// If the CSA does not match any of these paterns, return a CSADescriptor +/// that describes an InvalidCSA. +bool CSADescriptor::isCSAPhi(PHINode *Phi, Loop *TheLoop, CSADescriptor &CSA) { + + // Must be a scalar + Type *Type = Phi->getType(); + if (!Type->isIntegerTy() && !Type->isFloatingPointTy() && + !Type->isPointerTy()) + return false; + + // Match phi loop_inv, (select cmp, value, phi) + // or phi loop_inv, (select cmp, phi, value) + // or phi (select cmp, value, phi), loop_inv + // or phi (select cmp, phi, value), loop_inv + if (Phi->getNumIncomingValues() != 2) + return false; + auto SelectInstIt = find_if(Phi->incoming_values(), [&Phi](const Use &U) { + return match(U.get(), m_Select(m_Value(), m_Specific(Phi), m_Value())) || + match(U.get(), m_Select(m_Value(), m_Value(), m_Specific(Phi))); + }); + if (SelectInstIt == Phi->incoming_values().end()) + return false; + auto LoopInvIt = find_if(Phi->incoming_values(), [&](Use &U) { + return U.get() != *SelectInstIt && TheLoop->isLoopInvariant(U.get()); + }); + if (LoopInvIt == Phi->incoming_values().end()) + return false; + + // Phi or Sel must be used only outside the loop, + // excluding if Phi use Sel or Sel use Phi + auto IsOnlyUsedOutsideLoop = [&](Value *V, Value *Ignore) { + return all_of(V->users(), [Ignore, TheLoop](User *U) { + if (U == Ignore) + return true; + if (auto *I = dyn_cast(U)) + return !TheLoop->contains(I); + return true; + }); + }; + Instruction *Select = cast(SelectInstIt->get()); + Value *LoopInv = LoopInvIt->get(); + if (!IsOnlyUsedOutsideLoop(Phi, Select) || + !IsOnlyUsedOutsideLoop(Select, Phi)) + return false; + + CSA = CSADescriptor(Phi, Select, LoopInv); + return true; +} diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index d4b6c08c5a32b2..e54cf60fb6b7d7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1351,6 +1351,10 @@ bool TargetTransformInfo::preferEpilogueVectorization() const { return TTIImpl->preferEpilogueVectorization(); } +bool TargetTransformInfo::enableCSAVectorization() const { + return TTIImpl->enableCSAVectorization(); +} + TargetTransformInfo::VPLegalization TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { return TTIImpl->getVPLegalizationStrategy(VPI); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 49192bd6380223..b061ae3a6262a1 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2361,6 +2361,11 @@ bool RISCVTTIImpl::isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) { return true; } +bool RISCVTTIImpl::enableCSAVectorization() const { + return ST->hasVInstructions() && + ST->getProcFamily() == RISCVSubtarget::SiFive7; +} + bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) { auto *VTy = dyn_cast(DataTy); if (!VTy || VTy->isScalableTy()) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index bd90bfed6e2c95..dc5850d05b15c9 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -306,6 +306,10 @@ class RISCVTTIImpl : public BasicTTIImplBase { return TLI->isVScaleKnownToBeAPowerOfTwo(); } + /// \returns true if the loop vectorizer should vectorize conditional + /// scalar assignments for the target. + bool enableCSAVectorization() const; + /// \returns How the target needs this vector-predicated operation to be /// transformed. TargetTransformInfo::VPLegalization diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 555c8435dd330d..cb8e5800c9baa8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -83,6 +83,10 @@ static cl::opt EnableHistogramVectorization( "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms")); +static cl::opt + EnableCSA("enable-csa-vectorization", cl::init(false), cl::Hidden, + cl::desc("Control whether CSA loop vectorization is enabled")); + /// Maximum vectorization interleave count. static const unsigned MaxInterleaveFactor = 16; @@ -750,6 +754,15 @@ bool LoopVectorizationLegality::setupOuterLoopInductions() { return llvm::all_of(Header->phis(), IsSupportedPhi); } +void LoopVectorizationLegality::addCSAPhi( + PHINode *Phi, const CSADescriptor &CSADesc, + SmallPtrSetImpl &AllowedExit) { + assert(CSADesc.isValid() && "Expected Valid CSADescriptor"); + LLVM_DEBUG(dbgs() << "LV: found legal CSA opportunity" << *Phi << "\n"); + AllowedExit.insert(Phi); + CSAs.insert({Phi, CSADesc}); +} + /// Checks if a function is scalarizable according to the TLI, in /// the sense that it should be vectorized and then expanded in /// multiple scalar calls. This is represented in the @@ -867,14 +880,24 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } - // As a last resort, coerce the PHI to a AddRec expression - // and re-try classifying it a an induction PHI. + // Try to coerce the PHI to a AddRec expression and re-try classifying + // it a an induction PHI. if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) && !IsDisallowedStridedPointerInduction(ID)) { addInductionPhi(Phi, ID, AllowedExit); continue; } + // Check if the PHI can be classified as a CSA PHI. + if (EnableCSA || (TTI->enableCSAVectorization() && + EnableCSA.getNumOccurrences() == 0)) { + CSADescriptor CSADesc; + if (CSADescriptor::isCSAPhi(Phi, TheLoop, CSADesc)) { + addCSAPhi(Phi, CSADesc, AllowedExit); + continue; + } + } + reportVectorizationFailure("Found an unidentified PHI", "value that could not be identified as " "reduction is used outside the loop", @@ -1858,11 +1881,15 @@ bool LoopVectorizationLegality::canFoldTailByMasking() const { for (const auto &Reduction : getReductionVars()) ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); + SmallPtrSet CSALiveOuts; + for (const auto &CSA : getCSAs()) + CSALiveOuts.insert(CSA.second.getAssignment()); + // TODO: handle non-reduction outside users when tail is folded by masking. for (auto *AE : AllowedExit) { // Check that all users of allowed exit values are inside the loop or - // are the live-out of a reduction. - if (ReductionLiveOuts.count(AE)) + // are the live-out of a reduction or a CSA. + if (ReductionLiveOuts.count(AE) || CSALiveOuts.count(AE)) continue; for (User *U : AE->users()) { Instruction *UI = cast(U); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index fbcf181a45a664..df68686b57dabb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -174,8 +174,8 @@ class VPBuilder { new VPInstruction(Opcode, Operands, WrapFlags, DL, Name)); } - VPValue *createNot(VPValue *Operand, DebugLoc DL = {}, - const Twine &Name = "") { + VPInstruction *createNot(VPValue *Operand, DebugLoc DL = {}, + const Twine &Name = "") { return createInstruction(VPInstruction::Not, {Operand}, DL, Name); } @@ -231,6 +231,37 @@ class VPBuilder { Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name)); } + VPInstruction *createCSAMaskPhi(VPValue *InitMask, DebugLoc DL, + const Twine &Name) { + return createInstruction(VPInstruction::CSAMaskPhi, {InitMask}, DL, Name); + } + + VPInstruction *createAnyOf(VPValue *Cond, DebugLoc DL, const Twine &Name) { + return createInstruction(VPInstruction::AnyOf, {Cond}, DL, Name); + } + + VPInstruction *createCSAMaskSel(VPValue *Cond, VPValue *MaskPhi, + VPValue *AnyOf, DebugLoc DL, + const Twine &Name) { + return createInstruction(VPInstruction::CSAMaskSel, {Cond, MaskPhi, AnyOf}, + DL, Name); + } + + VPInstruction *createAnyOfEVL(VPValue *Cond, VPValue *EVL, DebugLoc DL, + const Twine &Name) { + return createInstruction(VPInstruction::AnyOfEVL, {Cond, EVL}, DL, Name); + } + + VPInstruction *createCSAVLPhi(DebugLoc DL, const Twine &Name) { + return createInstruction(VPInstruction::CSAVLPhi, {}, DL, Name); + } + + VPInstruction *createCSAVLSel(VPValue *AnyOfEVL, VPValue *VLPhi, VPValue *EVL, + DebugLoc DL, const Twine &Name) { + return createInstruction(VPInstruction::CSAVLSel, {AnyOfEVL, VLPhi, EVL}, + DL, Name); + } + VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 61f7bd84902815..37e34db1f5fad5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -174,6 +174,8 @@ const char LLVMLoopVectorizeFollowupEpilogue[] = STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); STATISTIC(LoopsEpilogueVectorized, "Number of epilogues vectorized"); +STATISTIC(CSAsVectorized, + "Number of conditional scalar assignments vectorized"); static cl::opt EnableEpilogueVectorization( "enable-epilogue-vectorization", cl::init(true), cl::Hidden, @@ -4612,6 +4614,9 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPEVLBasedIVPHISC: case VPDef::VPPredInstPHISC: case VPDef::VPBranchOnMaskSC: + case VPRecipeBase::VPCSADataUpdateSC: + case VPRecipeBase::VPCSAExtractScalarSC: + case VPRecipeBase::VPCSAHeaderPHISC: continue; case VPDef::VPReductionSC: case VPDef::VPActiveLaneMaskPHISC: @@ -7550,9 +7555,17 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, /// not have corresponding recipes in \p Plan and are not marked to be ignored /// in \p CostCtx. This means the VPlan contains simplification that the legacy /// cost-model did not account for. -static bool planContainsAdditionalSimplifications(VPlan &Plan, - VPCostContext &CostCtx, - Loop *TheLoop) { +static bool +planContainsAdditionalSimplifications(VPlan &Plan, VPCostContext &CostCtx, + Loop *TheLoop, + LoopVectorizationLegality &Legal) { + // CSA cost is more complicated since there is significant overhead in the + // preheader and middle block. It also contains recipes that are not backed by + // underlying instructions in the original loop. This makes it difficult to + // model in the legacy cost model. + if (!Legal.getCSAs().empty()) + return true; + // First collect all instructions for the recipes in Plan. auto GetInstructionForCost = [](const VPRecipeBase *R) -> Instruction * { if (auto *S = dyn_cast(R)) @@ -7659,9 +7672,9 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { precomputeCosts(BestPlan, BestFactor.Width, CostCtx); assert((BestFactor.Width == LegacyVF.Width || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), - CostCtx, OrigLoop) || + CostCtx, OrigLoop, *Legal) || planContainsAdditionalSimplifications(getPlanFor(LegacyVF.Width), - CostCtx, OrigLoop)) && + CostCtx, OrigLoop, *Legal)) && " VPlan cost model and legacy cost model disagreed"); assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) && "when vectorizing, the scalar cost must be computed."); @@ -8806,9 +8819,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, return Recipe; VPHeaderPHIRecipe *PhiRecipe = nullptr; - assert((Legal->isReductionVariable(Phi) || - Legal->isFixedOrderRecurrence(Phi)) && - "can only widen reductions and fixed-order recurrences here"); VPValue *StartV = Operands[0]; if (Legal->isReductionVariable(Phi)) { const RecurrenceDescriptor &RdxDesc = @@ -8818,12 +8828,28 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi), CM.useOrderedReductions(RdxDesc)); - } else { + } else if (Legal->isFixedOrderRecurrence(Phi)) { // TODO: Currently fixed-order recurrences are modeled as chains of // first-order recurrences. If there are no users of the intermediate // recurrences in the chain, the fixed order recurrence should be modeled // directly, enabling more efficient codegen. PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); + } else if (Legal->isCSAPhi(Phi)) { + VPValue *InitScalar = Plan.getOrAddLiveIn( + Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); + + // Don't build full CSA for VF=ElementCount::getFixed(1) + bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { return VF.isScalar(); }, Range); + + // When the VF=getFixed(1), InitData is just InitScalar. + VPValue *InitData = + IsScalarVF ? InitScalar + : getVPValueOrAddLiveIn(PoisonValue::get(Phi->getType())); + PhiRecipe = new VPCSAHeaderPHIRecipe(Phi, InitData); + } else { + llvm_unreachable( + "can only widen reductions, fixed-order recurrences, and CSAs here"); } PhisToFix.push_back(PhiRecipe); @@ -8857,6 +8883,23 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, make_range(Operands.begin(), Operands.end())); if (auto *SI = dyn_cast(Instr)) { + auto *CSADescIt = find_if(Legal->getCSAs(), [&](auto CSA) { + return CSADescriptor::isCSASelect(CSA.second, SI); + }); + if (CSADescIt != Legal->getCSAs().end()) { + for (VPRecipeBase &R : + Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + if (auto PhiR = dyn_cast(&R)) { + if (PhiR->getUnderlyingInstr() == CSADescIt->first) { + auto *R = new VPCSADataUpdateRecipe( + SI, {PhiR, Operands[0], Operands[1], Operands[2]}); + PhiR->setDataUpdate(R); + return R; + } + } + } + } + return new VPWidenSelectRecipe( *SI, make_range(Operands.begin(), Operands.end())); } @@ -8869,6 +8912,72 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, return tryToWiden(Instr, Operands, VPBB); } +/// Add CSA Recipes that must occur after each instruction in the input IR +/// is processed and introduced into VPlan. +static void +addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder, + const LoopVectorizationLegality::CSAList &CSAs, + VPBasicBlock *MiddleVPBB, DebugLoc DL, VFRange &Range, + VPlan &Plan, Loop *OrigLoop) { + // Don't build CSA for VF=ElementCount::getFixed(1) + if (LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) { return VF.isScalar(); }, Range)) + return; + + VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + for (const auto &CSA : CSAs) { + // Build the MaskPhi recipe. + auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn( + ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext()))); + VPBuilder B; + B.setInsertPoint(Header, Header->getFirstNonPhi()); + auto *VPMaskPhi = B.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi"); + B.clearInsertionPoint(); + + auto GetVPValue = [&](Value *I) { + return RecipeBuilder.getRecipe(cast(I))->getVPSingleValue(); + }; + VPCSADataUpdateRecipe *VPDataUpdate = cast( + cast(GetVPValue(CSA.first))->getVPNewData()); + + // The CSA optimization wants to use a condition such that when it is + // true, a new value is assigned. However, it is possible that a true lane + // in WidenedCond corresponds to selection of the initial value instead. + // In that case, we must use the negation of WidenedCond. + // i.e. select cond new_val old_val versus select cond.not old_val new_val + assert(CSA.second.getCond() && + "CSADescriptor must know how to describe the condition"); + VPValue *WidenedCond = GetVPValue(CSA.second.getCond()); + VPValue *CondToUse = WidenedCond; + if (cast(CSA.second.getAssignment())->getTrueValue() == + CSA.first) { + auto *VPNotCond = B.createNot(WidenedCond, DL); + VPNotCond->insertBefore(VPDataUpdate); + CondToUse = VPNotCond; + } + + auto *VPAnyOf = B.createAnyOf(CondToUse, DL, "csa.cond.anyof"); + VPAnyOf->insertBefore(VPDataUpdate); + + auto *VPMaskSel = + B.createCSAMaskSel(CondToUse, VPMaskPhi, VPAnyOf, DL, "csa.mask.sel"); + VPMaskSel->insertAfter(VPAnyOf); + + VPDataUpdate->setVPNewMaskAndVPAnyOf(VPMaskSel, VPAnyOf); + VPValue *VPInitScalar = Plan.getOrAddLiveIn( + CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); + SmallVector PhiToFix; + for (User *U : VPDataUpdate->getUnderlyingValue()->users()) + if (auto *Phi = dyn_cast(U); + Phi && Phi->getParent() == OrigLoop->getUniqueExitBlock()) + PhiToFix.emplace_back(Phi); + VPCSAExtractScalarRecipe *ExtractScalarRecipe = + new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate}, + PhiToFix); + MiddleVPBB->insert(ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi()); + } +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -8961,7 +9070,8 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { // increments. static SetVector collectUsersInExitBlocks( Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, - const MapVector &Inductions) { + const MapVector &Inductions, + const MapVector &CSAs) { auto *MiddleVPBB = Plan.getMiddleBlock(); SetVector ExitUsersToFix; for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { @@ -8998,6 +9108,16 @@ static SetVector collectUsersInExitBlocks( if (ExitVPBB->getSinglePredecessor() == MiddleVPBB) continue; } + // Exit values for CSAs are computed and updated outside of VPlan and + // independent of induction recipes. + // TODO: Compute CSA exit values in VPlan. + if (isa(V) && + (isa(IncomingValue) && + any_of(IncomingValue->users(), [&CSAs](User *U) { + auto *P = dyn_cast(U); + return P && CSAs.contains(P); + }))) + continue; ExitUsersToFix.insert(ExitIRI); ExitIRI->addOperand(V); } @@ -9302,6 +9422,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPBB = cast(VPBB->getSingleSuccessor()); } + addCSAPostprocessRecipes(RecipeBuilder, Legal->getCSAs(), MiddleVPBB, DL, + Range, *Plan, OrigLoop); + // After here, VPBB should not be used. VPBB = nullptr; @@ -9317,8 +9440,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); } addScalarResumePhis(RecipeBuilder, *Plan); - SetVector ExitUsersToFix = collectUsersInExitBlocks( - OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); + SetVector ExitUsersToFix = + collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan, + Legal->getInductionVars(), Legal->getCSAs()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) { reportVectorizationFailure( @@ -10496,6 +10620,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; + CSAsVectorized += LVL.getCSAs().size(); // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. @@ -10519,6 +10644,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { PSI, Checks, BestPlan); LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; + CSAsVectorized += LVL.getCSAs().size(); // Add metadata to disable runtime unrolling a scalar loop when there // are no runtime checks about strides and memory. A scalar loop that is diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 432030a7b1adf3..c49df7addd5d7d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1044,7 +1044,7 @@ void VPlan::execute(VPTransformState *State) { VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { // Skip phi-like recipes that generate their backedege values themselves. - if (isa(&R)) + if (R.isPhiThatGeneratesBackedge()) continue; if (isa(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 12208a7968338b..4410df12670bfc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -795,10 +795,29 @@ class VPRecipeBase : public ilist_node_with_parent, bool mayHaveSideEffects() const; /// Returns true for PHI-like recipes. - bool isPhi() const { + virtual bool isPhi() const { + assert(getVPDefID() != VPInstructionSC && + "VPInstructions implement this function themselves"); return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC; } + /// Returns true for PHI-like recipes that exists in vector loop header basic + /// block + virtual bool isHeaderPhi() const { + assert(getVPDefID() != VPInstructionSC && + "VPInstructions implement this function themselves"); + return (getVPDefID() >= VPFirstHeaderPHISC && + getVPDefID() <= VPLastHeaderPHISC) || + getVPDefID() == VPWidenPHISC; + } + + /// Returns true for PHI-like recipes that generate their own backedge + virtual bool isPhiThatGeneratesBackedge() const { + assert(getVPDefID() != VPInstructionSC && + "VPInstructions implement this function themselves"); + return getVPDefID() == VPWidenPHISC || getVPDefID() == VPCSAHeaderPHISC; + } + /// Returns true if the recipe may read from memory. bool mayReadFromMemory() const; @@ -889,6 +908,9 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenPointerInductionSC: case VPRecipeBase::VPReductionPHISC: case VPRecipeBase::VPScalarCastSC: + case VPRecipeBase::VPCSAHeaderPHISC: + case VPRecipeBase::VPCSADataUpdateSC: + case VPRecipeBase::VPCSAExtractScalarSC: return true; case VPRecipeBase::VPBranchOnMaskSC: case VPRecipeBase::VPInterleaveSC: @@ -1243,6 +1265,11 @@ class VPInstruction : public VPRecipeWithIRFlags, // Returns a scalar boolean value, which is true if any lane of its single // operand is true. AnyOf, + CSAMaskPhi, + CSAMaskSel, + CSAVLPhi, + CSAVLSel, + AnyOfEVL, }; private: @@ -1393,6 +1420,16 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Returns the symbolic name assigned to the VPInstruction. StringRef getName() const { return Name; } + + /// Returns true for PHI-like recipes. + bool isPhi() const override; + + /// Returns true for PHI-like recipes that exists in vector loop header basic + /// block + bool isHeaderPhi() const override; + + /// Returns true for PHI-like recipes that generate their own backedge + bool isPhiThatGeneratesBackedge() const override; }; /// A recipe to wrap on original IR instruction not to be modified during @@ -2850,6 +2887,128 @@ class VPBranchOnMaskRecipe : public VPRecipeBase { } }; +class VPCSAHeaderPHIRecipe final : public VPHeaderPHIRecipe { +public: + VPCSAHeaderPHIRecipe(PHINode *Phi, VPValue *VPInitData) + : VPHeaderPHIRecipe(VPDef::VPCSAHeaderPHISC, Phi, VPInitData) {} + + ~VPCSAHeaderPHIRecipe() override = default; + + VPCSAHeaderPHIRecipe *clone() override { + return new VPCSAHeaderPHIRecipe(cast(getUnderlyingInstr()), + getOperand(0)); + } + + void execute(VPTransformState &State) override; + + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + VP_CLASSOF_IMPL(VPDef::VPCSAHeaderPHISC) + + static inline bool classof(const VPHeaderPHIRecipe *R) { + return R->getVPDefID() == VPDef::VPCSAHeaderPHISC; + } + + VPValue *getVPInitData() { return getOperand(0); } + + VPValue *NewData = nullptr; + void setDataUpdate(VPValue *V) { NewData = V; } + VPValue *getVPNewData() { return NewData; } +}; + +class VPCSADataUpdateRecipe final : public VPSingleDefRecipe { +public: + VPCSADataUpdateRecipe(SelectInst *SI, ArrayRef Operands) + : VPSingleDefRecipe(VPDef::VPCSADataUpdateSC, Operands, SI) {} + + ~VPCSADataUpdateRecipe() override = default; + + VPCSADataUpdateRecipe *clone() override { + SmallVector Ops(operands()); + return new VPCSADataUpdateRecipe(cast(getUnderlyingInstr()), + Ops); + } + + void execute(VPTransformState &State) override; + + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + VP_CLASSOF_IMPL(VPDef::VPCSADataUpdateSC) + + VPValue *getVPDataPhi() const { return getOperand(0); } + + // The condition from the original select statement + VPValue *getVPCond() const { return getOperand(1); } + + // The true value from the original select statement + VPValue *getVPTrue() const { return getOperand(2); } + + // The false value from the original select statement + VPValue *getVPFalse() const { return getOperand(3); } + + // We combine the setters so we can be sure NewMask is before AnyOf + // in the operands list, so the getters can be sure which operand numbers + // to get. + void setVPNewMaskAndVPAnyOf(VPValue *NewMask, VPValue *AnyOf) { + addOperand(NewMask); + addOperand(AnyOf); + } + + VPValue *getVPNewMask() const { return getOperand(4); } + + VPValue *getVPAnyOf() const { return getOperand(5); } +}; + +class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe { + SmallVector PhisToFix; + +public: + VPCSAExtractScalarRecipe(ArrayRef Operands, + SmallVector PhisToFix) + : VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands), + PhisToFix(PhisToFix) {} + + ~VPCSAExtractScalarRecipe() override = default; + + VPCSAExtractScalarRecipe *clone() override { + SmallVector Ops(operands()); + return new VPCSAExtractScalarRecipe(Ops, PhisToFix); + } + + void execute(VPTransformState &State) override; + + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + VP_CLASSOF_IMPL(VPDef::VPCSAExtractScalarSC) + + VPValue *getVPInitScalar() const { return getOperand(0); } + VPValue *getVPMaskSel() const { return getOperand(1); } + VPValue *getVPDataSel() const { return getOperand(2); } + VPValue *getVPCSAVLSel() const { return getOperand(3); } + bool usesEVL() const { return getNumOperands() == 4; } +}; + /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when /// control converges back from a Branch-on-Mask. The phi nodes are needed in /// order to merge values that are set under such a branch and feed their uses. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 0d981ff5826ed8..221c50fc0fadee 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -249,6 +249,13 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { }) .Case([this](const auto *R) { return inferScalarType(R->getChainOp()); + }) + .Case( + [V](const auto *R) { return V->getUnderlyingValue()->getType(); }) + .Case( + [V](const auto *R) { return V->getUnderlyingValue()->getType(); }) + .Case([V](const auto *R) { + return V->getUnderlyingValue()->getType(); }); assert(ResultTy && "could not infer type for the given VPValue"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ab95b647f211b7..fed6dc8c78a700 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -363,6 +363,9 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: case VPInstruction::AnyOf: + case VPInstruction::AnyOfEVL: + case VPInstruction::CSAVLSel: + case VPInstruction::CSAVLPhi: return true; default: return false; @@ -649,6 +652,57 @@ Value *VPInstruction::generate(VPTransformState &State) { Value *A = State.get(getOperand(0)); return Builder.CreateOrReduce(A); } + case VPInstruction::AnyOfEVL: { + Value *WidenedCond = State.get(getOperand(0)); + Value *AllOnesMask = Constant::getAllOnesValue( + VectorType::get(Type::getInt1Ty(State.Builder.getContext()), State.VF)); + Value *EVL = State.get(getOperand(1), /*NeedsScalar=*/true); + + Value *StartValue = + ConstantInt::get(WidenedCond->getType()->getScalarType(), 0); + Value *AnyOf = State.Builder.CreateIntrinsic( + WidenedCond->getType()->getScalarType(), Intrinsic::vp_reduce_or, + {StartValue, WidenedCond, AllOnesMask, EVL}, nullptr, "any.active"); + return AnyOf; + } + case VPInstruction::CSAMaskPhi: { + BasicBlock *PreheaderBB = State.CFG.getPreheaderBBFor(this); + Value *InitMask = State.get(getOperand(0)); + PHINode *MaskPhi = + State.Builder.CreatePHI(InitMask->getType(), 2, "csa.mask.phi"); + MaskPhi->addIncoming(InitMask, PreheaderBB); + State.set(this, MaskPhi); + return MaskPhi; + } + case VPInstruction::CSAMaskSel: { + Value *WidenedCond = State.get(getOperand(0)); + Value *MaskPhi = State.get(getOperand(1)); + Value *AnyOf = State.get(getOperand(2), /*NeedsScalar=*/true); + Value *MaskSel = + State.Builder.CreateSelect(AnyOf, WidenedCond, MaskPhi, "csa.mask.sel"); + cast(MaskPhi)->addIncoming(MaskSel, State.CFG.PrevBB); + return MaskSel; + } + case VPInstruction::CSAVLPhi: { + IRBuilder<>::InsertPointGuard Guard(State.Builder); + State.Builder.SetInsertPoint(State.CFG.PrevBB->getFirstNonPHI()); + BasicBlock *PreheaderBB = State.CFG.getPreheaderBBFor(this); + + // InitVL can be anything since it won't be used if no mask was active + Value *InitVL = ConstantInt::get(State.Builder.getInt32Ty(), 0); + PHINode *VLPhi = + State.Builder.CreatePHI(InitVL->getType(), 2, "csa.vl.phi"); + VLPhi->addIncoming(InitVL, PreheaderBB); + return VLPhi; + } + case VPInstruction::CSAVLSel: { + Value *AnyOf = State.get(getOperand(0), /*NeedsScalar=*/true); + Value *VLPhi = State.get(getOperand(1), /*NeedsScalar=*/true); + Value *EVL = State.get(getOperand(2), /*NeedsScalar=*/true); + Value *VLSel = State.Builder.CreateSelect(AnyOf, EVL, VLPhi, "csa.vl.sel"); + cast(VLPhi)->addIncoming(VLSel, State.CFG.PrevBB); + return VLSel; + } default: llvm_unreachable("Unsupported opcode for instruction"); @@ -658,11 +712,30 @@ Value *VPInstruction::generate(VPTransformState &State) { bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractFromEnd || getOpcode() == VPInstruction::ComputeReductionResult || - getOpcode() == VPInstruction::AnyOf; + getOpcode() == VPInstruction::AnyOf || + getOpcode() == VPInstruction::AnyOfEVL; } bool VPInstruction::isSingleScalar() const { - return getOpcode() == VPInstruction::ResumePhi; + return getOpcode() == VPInstruction::ResumePhi || + getOpcode() == VPInstruction::CSAVLPhi || + getOpcode() == VPInstruction::CSAVLSel || + getOpcode() == VPInstruction::ExplicitVectorLength; +} + +bool VPInstruction::isPhi() const { + return getOpcode() == VPInstruction::CSAMaskPhi || + getOpcode() == VPInstruction::CSAVLPhi; +} + +bool VPInstruction::isHeaderPhi() const { + return getOpcode() == VPInstruction::CSAMaskPhi || + getOpcode() == VPInstruction::CSAVLPhi; +} + +bool VPInstruction::isPhiThatGeneratesBackedge() const { + return getOpcode() == VPInstruction::CSAMaskPhi || + getOpcode() == VPInstruction::CSAVLPhi; } #if !defined(NDEBUG) @@ -820,6 +893,21 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::AnyOf: O << "any-of"; break; + case VPInstruction::AnyOfEVL: + O << "anyof-evl"; + break; + case VPInstruction::CSAMaskPhi: + O << "csa-mask-phi"; + break; + case VPInstruction::CSAMaskSel: + O << "csa-mask-sel"; + break; + case VPInstruction::CSAVLPhi: + O << "csa-vl-phi"; + break; + case VPInstruction::CSAVLSel: + O << "csa-vl-sel"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -2356,6 +2444,222 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, } #endif +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPCSAHeaderPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = csa-data-phi "; + printOperands(O, SlotTracker); +} +#endif + +void VPCSAHeaderPHIRecipe::execute(VPTransformState &State) { + // PrevBB is this BB + IRBuilder<>::InsertPointGuard Guard(State.Builder); + State.Builder.SetInsertPoint(State.CFG.PrevBB->getFirstNonPHI()); + + Value *InitData = State.get(getVPInitData(), 0); + PHINode *DataPhi = + State.Builder.CreatePHI(InitData->getType(), 2, "csa.data.phi"); + BasicBlock *PreheaderBB = State.CFG.getPreheaderBBFor(this); + DataPhi->addIncoming(InitData, PreheaderBB); + // Note: We didn't add Incoming for the new data since VPCSADataUpdateRecipe + // may not have been executed. We let VPCSADataUpdateRecipe::execute add the + // incoming operand to DataPhi. + + State.set(this, DataPhi); +} + +InstructionCost VPCSAHeaderPHIRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (VF.isScalar()) + return 0; + + InstructionCost C = 0; + auto *VTy = VectorType::get(Ctx.Types.inferScalarType(this), VF); + const TargetTransformInfo &TTI = Ctx.TTI; + + // FIXME: These costs should be moved into VPInstruction::computeCost. We put + // them here for now since there is no VPInstruction::computeCost support. + // CSAInitMask + C += TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VTy); + // CSAInitData + C += TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VTy); + return C; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPCSADataUpdateRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = csa-data-update "; + printOperands(O, SlotTracker); +} +#endif + +void VPCSADataUpdateRecipe::execute(VPTransformState &State) { + Value *AnyOf = State.get(getVPAnyOf(), /*NeedsScalar=*/true); + Value *DataUpdate = getVPDataPhi() == getVPTrue() ? State.get(getVPFalse()) + : State.get(getVPTrue()); + PHINode *DataPhi = cast(State.get(getVPDataPhi())); + Value *DataSel = + State.Builder.CreateSelect(AnyOf, DataUpdate, DataPhi, "csa.data.sel"); + + DataPhi->addIncoming(DataSel, State.CFG.PrevBB); + + State.set(this, DataSel); +} + +InstructionCost VPCSADataUpdateRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (VF.isScalar()) + return 0; + + InstructionCost C = 0; + auto *VTy = VectorType::get(Ctx.Types.inferScalarType(this), VF); + auto *MaskTy = VectorType::get(IntegerType::getInt1Ty(VTy->getContext()), VF); + constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + const TargetTransformInfo &TTI = Ctx.TTI; + + // Data Update + C += TTI.getCmpSelInstrCost(Instruction::Select, VTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + + // FIXME: These costs should be moved into VPInstruction::computeCost. We put + // them here for now since they are related to updating the data and there is + // no VPInstruction::computeCost support at the moment. + + // AnyOf + C += TTI.getArithmeticReductionCost(Instruction::Or, VTy, std::nullopt, + CostKind); + // VPVLSel + C += TTI.getCmpSelInstrCost(Instruction::Select, VTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + // MaskUpdate + C += TTI.getCmpSelInstrCost(Instruction::Select, MaskTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + return C; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPCSAExtractScalarRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = CSA-EXTRACT-SCALAR "; + printOperands(O, SlotTracker); +} +#endif + +void VPCSAExtractScalarRecipe::execute(VPTransformState &State) { + IRBuilder<>::InsertPointGuard Guard(State.Builder); + State.Builder.SetInsertPoint(State.CFG.ExitBB->getFirstNonPHI()); + + Value *InitScalar = getVPInitScalar()->getLiveInIRValue(); + Value *MaskSel = State.get(getVPMaskSel()); + Value *DataSel = State.get(getVPDataSel()); + + Value *LastIdx = nullptr; + Value *IndexVec = State.Builder.CreateStepVector( + VectorType::get(State.Builder.getInt32Ty(), State.VF), "csa.step"); + Value *NegOne = ConstantInt::get(IndexVec->getType()->getScalarType(), -1); + if (usesEVL()) { + // A vp.reduce.smax over the IndexVec with the MaskSel as the mask will + // give us the last active index into MaskSel, which gives us the correct + // index in the data vector to extract from. If no element in the mask + // is active, we pick -1. If we pick -1, then we will use the initial scalar + // value instead of extracting from the data vector. + Value *VL = State.get(getVPCSAVLSel(), /*NeedsScalar=*/true); + LastIdx = State.Builder.CreateIntrinsic(NegOne->getType(), + Intrinsic::vp_reduce_smax, + {NegOne, IndexVec, MaskSel, VL}); + } else { + // Get a vector where the elements are zero when the last active mask is + // false and the index in the vector when the mask is true. + Value *ActiveLaneIdxs = State.Builder.CreateSelect( + MaskSel, IndexVec, ConstantAggregateZero::get(IndexVec->getType())); + // Get the last active index in the mask. When no lanes in the mask are + // active, vector.umax will have value 0. Take the additional step to set + // LastIdx as -1 in this case to avoid the case of lane 0 of the mask being + // inactive, which would also cause the reduction to have value 0. + Value *MaybeLastIdx = State.Builder.CreateIntMaxReduce(ActiveLaneIdxs); + Value *IsLaneZeroActive = + State.Builder.CreateExtractElement(MaskSel, static_cast(0)); + Value *Zero = ConstantInt::get(MaybeLastIdx->getType(), 0); + Value *MaybeLastIdxEQZero = State.Builder.CreateICmpEQ(MaybeLastIdx, Zero); + Value *And = State.Builder.CreateAnd(IsLaneZeroActive, MaybeLastIdxEQZero); + LastIdx = State.Builder.CreateSelect(And, Zero, NegOne); + } + + Value *ExtractFromVec = + State.Builder.CreateExtractElement(DataSel, LastIdx, "csa.extract"); + Value *Zero = ConstantInt::get(LastIdx->getType(), 0); + Value *LastIdxGEZero = State.Builder.CreateICmpSGE(LastIdx, Zero); + Value *ChooseFromVecOrInit = + State.Builder.CreateSelect(LastIdxGEZero, ExtractFromVec, InitScalar); + + for (PHINode *Phi : PhisToFix) + Phi->addIncoming(ChooseFromVecOrInit, State.CFG.ExitBB); + + State.set(this, ChooseFromVecOrInit, /*IsScalar=*/true); +} + +InstructionCost +VPCSAExtractScalarRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (VF.isScalar()) + return 0; + + InstructionCost C = 0; + auto *VTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); + auto *Int32VTy = + VectorType::get(IntegerType::getInt32Ty(VTy->getContext()), VF); + auto *MaskTy = VectorType::get(IntegerType::getInt1Ty(VTy->getContext()), VF); + constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + const TargetTransformInfo &TTI = Ctx.TTI; + + // StepVector + ArrayRef Args; + IntrinsicCostAttributes CostAttrs(Intrinsic::stepvector, Int32VTy, Args); + C += TTI.getIntrinsicInstrCost(CostAttrs, CostKind); + // NegOneSplat + C += TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, Int32VTy); + // LastIdx + if (usesEVL()) { + C += TTI.getMinMaxReductionCost(Intrinsic::smax, Int32VTy, FastMathFlags(), + CostKind); + } else { + // ActiveLaneIdxs + C += TTI.getCmpSelInstrCost(Instruction::Select, MaskTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + // MaybeLastIdx + C += TTI.getMinMaxReductionCost(Intrinsic::smax, Int32VTy, FastMathFlags(), + CostKind); + // IsLaneZeroActive + C += TTI.getVectorInstrCost(Instruction::ExtractElement, MaskTy, CostKind); + // MaybeLastIdxEQZero + C += TTI.getCmpSelInstrCost(Instruction::ICmp, Int32VTy, MaskTy, + CmpInst::ICMP_EQ, CostKind); + // And + C += TTI.getArithmeticInstrCost(Instruction::And, MaskTy->getScalarType(), + CostKind); + // LastIdx + C += TTI.getCmpSelInstrCost(Instruction::Select, VTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + } + // ExtractFromVec + C += TTI.getVectorInstrCost(Instruction::ExtractElement, VTy, CostKind); + // LastIdxGEZero + C += TTI.getCmpSelInstrCost(Instruction::ICmp, Int32VTy, MaskTy, + CmpInst::ICMP_SGE, CostKind); + // ChooseFromVecOrInit + C += TTI.getCmpSelInstrCost(Instruction::Select, VTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + return C; +} + void VPBranchOnMaskRecipe::execute(VPTransformState &State) { assert(State.Lane && "Branch on Mask works only on single instance."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9a3b82fe57c12a..4cbf314e5872fc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1442,6 +1442,59 @@ void VPlanTransforms::addActiveLaneMask( HeaderMask->replaceAllUsesWith(LaneMask); } +/// Add recipes required to make CSA work with EVL based approach. This +/// includes replacing \p AnyOf with \p AnyOfEVL, and adding \p +/// CSAVLPhi and \p CSAVLSel instructions. +static void addExplicitVectorLengthForCSA(VPlan &Plan, VPValue &EVL) { + for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) { + // AnyOf is used to keep track of whether any condition on the + // current iteration is active. This is used to decide whether the mask + // should be updated. When we are using EVL, we must only consider the first + // EVL number of elements in the mask. Replace AnyOf with the EVL + // specific AnyOf instruction. + if (auto *VPAnyOf = dyn_cast(&R)) { + bool IsCSAAnyOf = false; + for (auto *U : VPAnyOf->users()) + if (auto *UI = dyn_cast(U)) + IsCSAAnyOf |= UI->getOpcode() == VPInstruction::CSAMaskSel; + if (!IsCSAAnyOf) + continue; + + if (VPAnyOf->getOpcode() == VPInstruction::AnyOf) { + VPBuilder B; + auto *VPAnyOfEVL = + B.createAnyOfEVL(VPAnyOf->getOperand(0), &EVL, + VPAnyOf->getDebugLoc(), "csa.cond.anyof"); + VPAnyOfEVL->insertBefore(VPAnyOf); + VPAnyOf->replaceAllUsesWith(VPAnyOfEVL->getVPSingleValue()); + VPAnyOf->eraseFromParent(); + + // When we are using EVL, we must keep track of the most recent EVL when + // at least one lane in the mask was active. Imagine the scenario: on + // iteration N, there was at least one active lane in the mask. Then on + // all future iteration there was no active lanes in the mask. When it + // is time to extract the scalar from the data vector, we must use the + // EVL that corresponds to the EVL that was used when the mask vector + // was last updated. To do this, we introduce CSAVLPhi and CSAVLSel + // instructions + auto *VPVLPhi = B.createCSAVLPhi({}, "csa.vl.phi"); + auto *VPVLSel = + B.createCSAVLSel(VPAnyOfEVL, VPVLPhi, &EVL, {}, "csa.vl.sel"); + VPVLPhi->insertBefore(&*VPAnyOfEVL->getParent()->getFirstNonPhi()); + VPVLSel->insertAfter(VPAnyOfEVL); + + // The ExtractScalarRecipe needs to use VPVLSel as an operand. + for (auto *U : VPAnyOfEVL->users()) + if (auto *DataUpdate = dyn_cast(U)) + for (auto *DataUpdateU : DataUpdate->users()) + if (auto *Extract = + dyn_cast(DataUpdateU)) + Extract->addOperand(VPVLSel); + } + } + } +} + /// Replace recipes with their EVL variants. static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { using namespace llvm::VPlanPatternMatch; @@ -1569,6 +1622,11 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { } recursivelyDeleteDeadRecipes(HeaderMask); } + + // We build the scalar version of a CSA when VF=ElementCount::getFixed(1), + // which does not require an EVL. + if (!Plan.hasScalarVFOnly()) + addExplicitVectorLengthForCSA(Plan, EVL); } /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 957a602091c733..fd95b465a0a18f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -348,6 +348,8 @@ class VPDef { VPWidenSelectSC, VPBlendSC, VPHistogramSC, + VPCSADataUpdateSC, + VPCSAExtractScalarSC, // START: Phi-like recipes. Need to be kept together. VPWidenPHISC, VPPredInstPHISC, @@ -360,6 +362,7 @@ class VPDef { VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPScalarPHISC, + VPCSAHeaderPHISC, VPReductionPHISC, // END: SubclassID for recipes that inherit VPHeaderPHIRecipe // END: Phi-like recipes diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index be420a873bef52..47373bc57345dd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -75,7 +75,7 @@ bool VPlanVerifier::verifyPhiRecipes(const VPBasicBlock *VPBB) { if (isa(RecipeI)) NumActiveLaneMaskPhiRecipes++; - if (IsHeaderVPBB && !isa(*RecipeI)) { + if (IsHeaderVPBB && !RecipeI->isHeaderPhi()) { errs() << "Found non-header PHI recipe in header VPBB"; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) errs() << ": "; @@ -150,7 +150,11 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { .Case( [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) .Case([&](const VPInstruction *I) { - if (I->getOpcode() != Instruction::Add) { + unsigned Opc = I->getOpcode(); + if (Opc == VPInstruction::AnyOfEVL || Opc == VPInstruction::CSAVLSel) + return true; + + if (Opc != Instruction::Add) { errs() << "EVL is used as an operand in non-VPInstruction::Add\n"; return false; } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/conditional-scalar-assignment.ll b/llvm/test/Transforms/LoopVectorize/RISCV/conditional-scalar-assignment.ll new file mode 100644 index 00000000000000..c0a764c81bdf9b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/conditional-scalar-assignment.ll @@ -0,0 +1,2928 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -S -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 \ +; RUN: -passes=loop-vectorize -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -enable-csa-vectorization | FileCheck %s -check-prefixes=CHECK,EVL +; RUN: opt < %s -S -passes=loop-vectorize -mtriple=riscv64 -mattr=+v \ +; RUN: -force-tail-folding-style=none -enable-csa-vectorization \ +; RUN: | FileCheck %s -check-prefixes=CHECK,NO-EVL + +; This function is generated from the following C/C++ program: +; int simple_csa_int_select(int N, int *data, int a) { +; int t = -1; +; for (int i = 0; i < N; i++) { +; if (a < data[i]) +; t = data[i]; +; } +; return t; // use t +; } +define i32 @simple_csa_int_select(i32 %N, ptr %data, i64 %a) { +; EVL-LABEL: define i32 @simple_csa_int_select( +; EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]], i64 [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; EVL: [[VECTOR_PH]]: +; EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP2]] +; EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 +; EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; EVL: [[VECTOR_BODY]]: +; EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_VL_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[CSA_VL_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[AVL:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[EVL_BASED_IV]] +; EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[TMP6]] +; EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP9:%.*]] = call @llvm.vp.sext.nxv4i64.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP10:%.*]] = icmp slt [[BROADCAST_SPLAT]], [[TMP9]] +; EVL-NEXT: [[ANY_ACTIVE:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[CSA_VL_SEL]] = select i1 [[ANY_ACTIVE]], i32 [[TMP5]], i32 [[CSA_VL_PHI]] +; EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[ANY_ACTIVE]], [[TMP10]], [[CSA_MASK_PHI]] +; EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[ANY_ACTIVE]], [[VP_OP_LOAD]], [[CSA_DATA_PHI]] +; EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP5]] to i64 +; EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] +; EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; EVL-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; EVL: [[MIDDLE_BLOCK]]: +; EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -1, [[CSA_STEP]], [[CSA_MASK_SEL]], i32 [[CSA_VL_SEL]]) +; EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP13]] +; EVL-NEXT: [[TMP14:%.*]] = icmp sge i32 [[TMP13]], 0 +; EVL-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[CSA_EXTRACT]], i32 -1 +; EVL-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; EVL: [[SCALAR_PH]]: +; EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT]]: +; EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; EVL-NEXT: ret i32 [[T_0_LCSSA]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[T_010:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] +; EVL-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; EVL-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 +; EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[A]], [[TMP17]] +; EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP16]], i32 [[T_010]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; +; NO-EVL-LABEL: define i32 @simple_csa_int_select( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]], i64 [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 +; NO-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[TMP6]] +; NO-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-EVL-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to +; NO-EVL-NEXT: [[TMP10:%.*]] = icmp slt [[BROADCAST_SPLAT]], [[TMP9]] +; NO-EVL-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP11]], [[TMP10]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP11]], [[WIDE_LOAD]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP13:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP13]]) +; NO-EVL-NEXT: [[TMP15:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP14]], 0 +; NO-EVL-NEXT: [[TMP17:%.*]] = and i1 [[TMP15]], [[TMP16]] +; NO-EVL-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP18]] +; NO-EVL-NEXT: [[TMP19:%.*]] = icmp sge i32 [[TMP18]], 0 +; NO-EVL-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[CSA_EXTRACT]], i32 -1 +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; NO-EVL-NEXT: ret i32 [[T_0_LCSSA]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_010:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-EVL-NEXT: [[TMP22:%.*]] = sext i32 [[TMP21]] to i64 +; NO-EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[A]], [[TMP22]] +; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP21]], i32 [[T_010]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %loop, %entry + %t.0.lcssa = phi i32 [ -1, %entry ], [ %spec.select, %loop ] + ret i32 %t.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %t.010 = phi i32 [ -1, %loop.preheader ], [ %spec.select, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %data, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %1 = sext i32 %0 to i64 + %cmp1 = icmp slt i64 %a, %1 + %spec.select = select i1 %cmp1, i32 %0, i32 %t.010 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; int simple_csa_int_select_induction_cmp(int N, int *data) { +; int t = -1; +; for (int i = 0; i < N; i++) { +; if (i < data[i]) +; t = data[i]; +; } +; return t; // use t +; } +define i32 @simple_csa_int_select_induction_cmp(i32 %N, ptr %data) { +; EVL-LABEL: define i32 @simple_csa_int_select_induction_cmp( +; EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]]) #[[ATTR0]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT:.*]]: +; EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; EVL-NEXT: ret i32 [[T_0_LCSSA]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[T_010:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] +; EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; EVL-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[IV]], [[TMP1]] +; EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP0]], i32 [[T_010]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +; NO-EVL-LABEL: define i32 @simple_csa_int_select_induction_cmp( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() +; NO-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) +; NO-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-EVL-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] +; NO-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; NO-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[TMP10]] +; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; NO-EVL-NEXT: [[TMP13:%.*]] = sext [[WIDE_LOAD]] to +; NO-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VEC_IND]], [[TMP13]] +; NO-EVL-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP14]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP15]], [[TMP14]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP15]], [[WIDE_LOAD]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP17:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP17]]) +; NO-EVL-NEXT: [[TMP19:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], 0 +; NO-EVL-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +; NO-EVL-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP22]] +; NO-EVL-NEXT: [[TMP23:%.*]] = icmp sge i32 [[TMP22]], 0 +; NO-EVL-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[CSA_EXTRACT]], i32 -1 +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP24]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; NO-EVL-NEXT: ret i32 [[T_0_LCSSA]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_010:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-EVL-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 +; NO-EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[IV]], [[TMP26]] +; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP25]], i32 [[T_010]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %loop, %entry + %t.0.lcssa = phi i32 [ -1, %entry ], [ %spec.select, %loop ] + ret i32 %t.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %t.010 = phi i32 [ -1, %loop.preheader ], [ %spec.select, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %data, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %1 = sext i32 %0 to i64 + %cmp1 = icmp slt i64 %iv, %1 + %spec.select = select i1 %cmp1, i32 %0, i32 %t.010 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; float simple_csa_float_select(int N, float *data) { +; float t = 1.0f; +; for (int i = 0; i < N; i++) { +; if (0.0f < data[i]) +; t = data[i]; +; } +; return t; // use t +; } +define float @simple_csa_float_select(i32 %N, ptr %data) { +; EVL-LABEL: define float @simple_csa_float_select( +; EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]]) #[[ATTR0]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; EVL-NEXT: br i1 [[CMP8]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; EVL: [[VECTOR_PH]]: +; EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP2]] +; EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; EVL: [[VECTOR_BODY]]: +; EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, float poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_VL_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[CSA_VL_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[AVL:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[EVL_BASED_IV]] +; EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DATA]], i64 [[TMP6]] +; EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP9:%.*]] = fcmp ogt [[VP_OP_LOAD]], zeroinitializer +; EVL-NEXT: [[ANY_ACTIVE:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP9]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[CSA_VL_SEL]] = select i1 [[ANY_ACTIVE]], i32 [[TMP5]], i32 [[CSA_VL_PHI]] +; EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[ANY_ACTIVE]], [[TMP9]], [[CSA_MASK_PHI]] +; EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[ANY_ACTIVE]], [[VP_OP_LOAD]], [[CSA_DATA_PHI]] +; EVL-NEXT: [[TMP10:%.*]] = zext i32 [[TMP5]] to i64 +; EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP10]], [[EVL_BASED_IV]] +; EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; EVL-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; EVL: [[MIDDLE_BLOCK]]: +; EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -1, [[CSA_STEP]], [[CSA_MASK_SEL]], i32 [[CSA_VL_SEL]]) +; EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP12]] +; EVL-NEXT: [[TMP13:%.*]] = icmp sge i32 [[TMP12]], 0 +; EVL-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[CSA_EXTRACT]], float 1.000000e+00 +; EVL-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; EVL: [[SCALAR_PH]]: +; EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT]]: +; EVL-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[LOOP]] ], [ [[TMP14]], %[[MIDDLE_BLOCK]] ] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[T_0_LCSSA:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[T_1_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; EVL-NEXT: ret float [[T_0_LCSSA]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[T_09:%.*]] = phi float [ 1.000000e+00, %[[SCALAR_PH]] ], [ [[T_1]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[DATA]], i64 [[IV]] +; EVL-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; EVL-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP15]], 0.000000e+00 +; EVL-NEXT: [[T_1]] = select i1 [[CMP1]], float [[TMP15]], float [[T_09]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; +; NO-EVL-LABEL: define float @simple_csa_float_select( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP8]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, float poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DATA]], i64 [[TMP6]] +; NO-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-EVL-NEXT: [[TMP9:%.*]] = fcmp ogt [[WIDE_LOAD]], zeroinitializer +; NO-EVL-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP10]], [[TMP9]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP10]], [[WIDE_LOAD]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP12:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP12]]) +; NO-EVL-NEXT: [[TMP14:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP13]], 0 +; NO-EVL-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]] +; NO-EVL-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP17]] +; NO-EVL-NEXT: [[TMP18:%.*]] = icmp sge i32 [[TMP17]], 0 +; NO-EVL-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], float [[CSA_EXTRACT]], float 1.000000e+00 +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[LOOP]] ], [ [[TMP19]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[T_0_LCSSA:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[T_1_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; NO-EVL-NEXT: ret float [[T_0_LCSSA]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_09:%.*]] = phi float [ 1.000000e+00, %[[SCALAR_PH]] ], [ [[T_1]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[DATA]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; NO-EVL-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP20]], 0.000000e+00 +; NO-EVL-NEXT: [[T_1]] = select i1 [[CMP1]], float [[TMP20]], float [[T_09]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; +entry: + %cmp8 = icmp sgt i32 %N, 0 + br i1 %cmp8, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %loop, %entry + %t.0.lcssa = phi float [ 1.000000e+00, %entry ], [ %t.1, %loop ] + ret float %t.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %t.09 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.1, %loop ] + %arrayidx = getelementptr inbounds float, ptr %data, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 0.000000e+00 + %t.1 = select i1 %cmp1, float %0, float %t.09 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; int simple_csa_int(int N, bool *cond, int *data) { +; int t = -1; +; for (int i = 0; i < N; i++) { +; if (cond[i]) +; t = data[i]; +; } +; return t; // use t +; } +define i32 @simple_csa_int(i32 %N, ptr %cond, ptr %data) { +; CHECK-LABEL: define i32 @simple_csa_int( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND:%.*]], ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi i32 [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[T_1_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_07:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi i32 [ [[TMP1]], %[[IF_THEN]] ], [ [[T_07]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp6 = icmp sgt i32 %N, 0 + br i1 %cmp6, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %for.inc, %entry + %t.0.lcssa = phi i32 [ -1, %entry ], [ %t.1, %for.inc ] + ret i32 %t.0.lcssa + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %t.07 = phi i32 [ -1, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond, i64 %iv + %0 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds i32, ptr %data, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %loop, %if.then + %t.1 = phi i32 [ %1, %if.then ], [ %t.07, %loop ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; float simple_csa_float(int N, bool *cond, float *data) { +; float t = 1.0f; +; for (int i = 0; i < N; i++) { +; if (cond[i]) +; t = data[i]; +; } +; return t; // use t +; } +define float @simple_csa_float(i32 %N, ptr %cond, ptr %data) { +; CHECK-LABEL: define float @simple_csa_float( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND:%.*]], ptr [[DATA:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP6]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[T_1_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_07:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[DATA]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi float [ [[TMP1]], %[[IF_THEN]] ], [ [[T_07]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp6 = icmp sgt i32 %N, 0 + br i1 %cmp6, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %for.inc, %entry + %t.0.lcssa = phi float [ 1.000000e+00, %entry ], [ %t.1, %for.inc ] + ret float %t.0.lcssa + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %t.07 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond, i64 %iv + %0 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds float, ptr %data, i64 %iv + %1 = load float, ptr %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %loop, %if.then + %t.1 = phi float [ %1, %if.then ], [ %t.07, %loop ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_in_series_int_select(int N, int *data0, int *data1, int a) { +; int t = -1; +; int s = -1; +; for (int i = 0; i < N; i++) { +; if (a < data0[i]) +; t = data0[i]; +; if (a < data1[i]) +; s = data1[i]; +; } +; return t | s; // use t and s +; } +define i32 @csa_in_series_int_select(i32 %N, ptr %data0, ptr %data1, i64 %a) { +; EVL-LABEL: define i32 @csa_in_series_int_select( +; EVL-SAME: i32 [[N:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]], i64 [[A:%.*]]) #[[ATTR0]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[N]], 0 +; EVL-NEXT: br i1 [[CMP21]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; EVL: [[VECTOR_PH]]: +; EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP2]] +; EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 +; EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; EVL: [[VECTOR_BODY]]: +; EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL8:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_DATA_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL7:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_MASK_PHI2:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_VL_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[CSA_VL_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_VL_PHI3:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[CSA_VL_SEL6:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[AVL:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[EVL_BASED_IV]] +; EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[TMP6]] +; EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP9:%.*]] = call @llvm.vp.sext.nxv4i64.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP10:%.*]] = icmp slt [[BROADCAST_SPLAT]], [[TMP9]] +; EVL-NEXT: [[ANY_ACTIVE:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[CSA_VL_SEL]] = select i1 [[ANY_ACTIVE]], i32 [[TMP5]], i32 [[CSA_VL_PHI]] +; EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[ANY_ACTIVE]], [[TMP10]], [[CSA_MASK_PHI2]] +; EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[ANY_ACTIVE]], [[VP_OP_LOAD]], [[CSA_DATA_PHI1]] +; EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[TMP6]] +; EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP13:%.*]] = call @llvm.vp.sext.nxv4i64.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP14:%.*]] = icmp slt [[BROADCAST_SPLAT]], [[TMP13]] +; EVL-NEXT: [[ANY_ACTIVE5:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP14]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[CSA_VL_SEL6]] = select i1 [[ANY_ACTIVE5]], i32 [[TMP5]], i32 [[CSA_VL_PHI3]] +; EVL-NEXT: [[CSA_MASK_SEL7]] = select i1 [[ANY_ACTIVE5]], [[TMP14]], [[CSA_MASK_PHI]] +; EVL-NEXT: [[CSA_DATA_SEL8]] = select i1 [[ANY_ACTIVE5]], [[VP_OP_LOAD4]], [[CSA_DATA_PHI]] +; EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP5]] to i64 +; EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[EVL_BASED_IV]] +; EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; EVL: [[MIDDLE_BLOCK]]: +; EVL-NEXT: [[CSA_STEP9:%.*]] = call @llvm.stepvector.nxv4i32() +; EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -1, [[CSA_STEP9]], [[CSA_MASK_SEL7]], i32 [[CSA_VL_SEL6]]) +; EVL-NEXT: [[CSA_EXTRACT10:%.*]] = extractelement [[CSA_DATA_SEL8]], i32 [[TMP17]] +; EVL-NEXT: [[TMP18:%.*]] = icmp sge i32 [[TMP17]], 0 +; EVL-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[CSA_EXTRACT10]], i32 -1 +; EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; EVL-NEXT: [[TMP20:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -1, [[CSA_STEP]], [[CSA_MASK_SEL]], i32 [[CSA_VL_SEL]]) +; EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP20]] +; EVL-NEXT: [[TMP21:%.*]] = icmp sge i32 [[TMP20]], 0 +; EVL-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[CSA_EXTRACT]], i32 -1 +; EVL-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; EVL: [[SCALAR_PH]]: +; EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT]]: +; EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ] +; EVL-NEXT: [[S_1_LCSSA:%.*]] = phi i32 [ [[S_1:%.*]], %[[LOOP]] ], [ [[TMP19]], %[[MIDDLE_BLOCK]] ] +; EVL-NEXT: [[TMP23:%.*]] = or i32 [[S_1_LCSSA]], [[SPEC_SELECT_LCSSA]] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[OR:%.*]] = phi i32 [ [[TMP23]], %[[EXIT_LOOPEXIT]] ], [ -1, %[[ENTRY]] ] +; EVL-NEXT: ret i32 [[OR]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[S_023:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[S_1]], %[[LOOP]] ] +; EVL-NEXT: [[T_022:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; EVL-NEXT: [[TMP25:%.*]] = sext i32 [[TMP24]] to i64 +; EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[A]], [[TMP25]] +; EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP24]], i32 [[T_022]] +; EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; EVL-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; EVL-NEXT: [[TMP27:%.*]] = sext i32 [[TMP26]] to i64 +; EVL-NEXT: [[CMP6:%.*]] = icmp slt i64 [[A]], [[TMP27]] +; EVL-NEXT: [[S_1]] = select i1 [[CMP6]], i32 [[TMP26]], i32 [[S_023]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; +; NO-EVL-LABEL: define i32 @csa_in_series_int_select( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]], i64 [[A:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP21]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 +; NO-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL5:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL4:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[TMP6]] +; NO-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-EVL-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to +; NO-EVL-NEXT: [[TMP10:%.*]] = icmp slt [[BROADCAST_SPLAT]], [[TMP9]] +; NO-EVL-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP11]], [[TMP10]], [[CSA_MASK_PHI1]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP11]], [[WIDE_LOAD]], [[CSA_DATA_PHI2]] +; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[TMP6]] +; NO-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 4 +; NO-EVL-NEXT: [[TMP14:%.*]] = sext [[WIDE_LOAD3]] to +; NO-EVL-NEXT: [[TMP15:%.*]] = icmp slt [[BROADCAST_SPLAT]], [[TMP14]] +; NO-EVL-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL4]] = select i1 [[TMP16]], [[TMP15]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL5]] = select i1 [[TMP16]], [[WIDE_LOAD3]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP6:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP18:%.*]] = select [[CSA_MASK_SEL4]], [[CSA_STEP6]], zeroinitializer +; NO-EVL-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP18]]) +; NO-EVL-NEXT: [[TMP20:%.*]] = extractelement [[CSA_MASK_SEL4]], i64 0 +; NO-EVL-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], 0 +; NO-EVL-NEXT: [[TMP22:%.*]] = and i1 [[TMP20]], [[TMP21]] +; NO-EVL-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT7:%.*]] = extractelement [[CSA_DATA_SEL5]], i32 [[TMP23]] +; NO-EVL-NEXT: [[TMP24:%.*]] = icmp sge i32 [[TMP23]], 0 +; NO-EVL-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[CSA_EXTRACT7]], i32 -1 +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP26:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP26]]) +; NO-EVL-NEXT: [[TMP28:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP27]], 0 +; NO-EVL-NEXT: [[TMP30:%.*]] = and i1 [[TMP28]], [[TMP29]] +; NO-EVL-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP31]] +; NO-EVL-NEXT: [[TMP32:%.*]] = icmp sge i32 [[TMP31]], 0 +; NO-EVL-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[CSA_EXTRACT]], i32 -1 +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP33]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: [[S_1_LCSSA:%.*]] = phi i32 [ [[S_1:%.*]], %[[LOOP]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: [[TMP34:%.*]] = or i32 [[S_1_LCSSA]], [[SPEC_SELECT_LCSSA]] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[OR:%.*]] = phi i32 [ [[TMP34]], %[[EXIT_LOOPEXIT]] ], [ -1, %[[ENTRY]] ] +; NO-EVL-NEXT: ret i32 [[OR]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[S_023:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[S_1]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_022:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-EVL-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +; NO-EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[A]], [[TMP36]] +; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP35]], i32 [[T_022]] +; NO-EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; NO-EVL-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 +; NO-EVL-NEXT: [[CMP6:%.*]] = icmp slt i64 [[A]], [[TMP38]] +; NO-EVL-NEXT: [[S_1]] = select i1 [[CMP6]], i32 [[TMP37]], i32 [[S_023]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; +entry: + %cmp21 = icmp sgt i32 %N, 0 + br i1 %cmp21, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %loop + %0 = or i32 %s.1, %spec.select + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %or = phi i32 [ %0, %exit.loopexit ], [ -1, %entry ] + ret i32 %or + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %s.023 = phi i32 [ -1, %loop.preheader ], [ %s.1, %loop ] + %t.022 = phi i32 [ -1, %loop.preheader ], [ %spec.select, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %data0, i64 %iv + %1 = load i32, ptr %arrayidx, align 4 + %2 = sext i32 %1 to i64 + %cmp1 = icmp slt i64 %a, %2 + %spec.select = select i1 %cmp1, i32 %1, i32 %t.022 + %arrayidx5 = getelementptr inbounds i32, ptr %data1, i64 %iv + %3 = load i32, ptr %arrayidx5, align 4 + %4 = sext i32 %3 to i64 + %cmp6 = icmp slt i64 %a, %4 + %s.1 = select i1 %cmp6, i32 %3, i32 %s.023 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_in_series_int_select_induction_cmp(int N, int *data0, int *data1) { +; int t = -1; +; int s = -1; +; for (int i = 0; i < N; i++) { +; if (i < data0[i]) +; t = data0[i]; +; if (i < data1[i]) +; s = data1[i]; +; } +; return t | s; // use t and s +; } +define i32 @csa_in_series_int_select_induction_cmp(i32 %N, ptr %data0, ptr %data1) { +; EVL-LABEL: define i32 @csa_in_series_int_select_induction_cmp( +; EVL-SAME: i32 [[N:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[N]], 0 +; EVL-NEXT: br i1 [[CMP21]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT:.*]]: +; EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[S_1_LCSSA:%.*]] = phi i32 [ [[S_1:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[TMP0:%.*]] = or i32 [[S_1_LCSSA]], [[SPEC_SELECT_LCSSA]] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[OR:%.*]] = phi i32 [ [[TMP0]], %[[EXIT_LOOPEXIT]] ], [ -1, %[[ENTRY]] ] +; EVL-NEXT: ret i32 [[OR]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[S_023:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[S_1]], %[[LOOP]] ] +; EVL-NEXT: [[T_022:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; EVL-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; EVL-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[IV]], [[TMP2]] +; EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[T_022]] +; EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; EVL-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; EVL-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 +; EVL-NEXT: [[CMP6:%.*]] = icmp slt i64 [[IV]], [[TMP4]] +; EVL-NEXT: [[S_1]] = select i1 [[CMP6]], i32 [[TMP3]], i32 [[S_023]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +; NO-EVL-LABEL: define i32 @csa_in_series_int_select_induction_cmp( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP21]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() +; NO-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) +; NO-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-EVL-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] +; NO-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; NO-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL5:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL4:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[TMP10]] +; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; NO-EVL-NEXT: [[TMP13:%.*]] = sext [[WIDE_LOAD]] to +; NO-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VEC_IND]], [[TMP13]] +; NO-EVL-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP14]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP15]], [[TMP14]], [[CSA_MASK_PHI1]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP15]], [[WIDE_LOAD]], [[CSA_DATA_PHI2]] +; NO-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[TMP10]] +; NO-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 +; NO-EVL-NEXT: [[TMP18:%.*]] = sext [[WIDE_LOAD3]] to +; NO-EVL-NEXT: [[TMP19:%.*]] = icmp slt [[VEC_IND]], [[TMP18]] +; NO-EVL-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP19]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL4]] = select i1 [[TMP20]], [[TMP19]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL5]] = select i1 [[TMP20]], [[WIDE_LOAD3]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP6:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP22:%.*]] = select [[CSA_MASK_SEL4]], [[CSA_STEP6]], zeroinitializer +; NO-EVL-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP22]]) +; NO-EVL-NEXT: [[TMP24:%.*]] = extractelement [[CSA_MASK_SEL4]], i64 0 +; NO-EVL-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP23]], 0 +; NO-EVL-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] +; NO-EVL-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT7:%.*]] = extractelement [[CSA_DATA_SEL5]], i32 [[TMP27]] +; NO-EVL-NEXT: [[TMP28:%.*]] = icmp sge i32 [[TMP27]], 0 +; NO-EVL-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[CSA_EXTRACT7]], i32 -1 +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP30:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP30]]) +; NO-EVL-NEXT: [[TMP32:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP33:%.*]] = icmp eq i32 [[TMP31]], 0 +; NO-EVL-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] +; NO-EVL-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP35]] +; NO-EVL-NEXT: [[TMP36:%.*]] = icmp sge i32 [[TMP35]], 0 +; NO-EVL-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 [[CSA_EXTRACT]], i32 -1 +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP37]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: [[S_1_LCSSA:%.*]] = phi i32 [ [[S_1:%.*]], %[[LOOP]] ], [ [[TMP29]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: [[TMP38:%.*]] = or i32 [[S_1_LCSSA]], [[SPEC_SELECT_LCSSA]] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[OR:%.*]] = phi i32 [ [[TMP38]], %[[EXIT_LOOPEXIT]] ], [ -1, %[[ENTRY]] ] +; NO-EVL-NEXT: ret i32 [[OR]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[S_023:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[S_1]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_022:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-EVL-NEXT: [[TMP40:%.*]] = sext i32 [[TMP39]] to i64 +; NO-EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[IV]], [[TMP40]] +; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP39]], i32 [[T_022]] +; NO-EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; NO-EVL-NEXT: [[TMP42:%.*]] = sext i32 [[TMP41]] to i64 +; NO-EVL-NEXT: [[CMP6:%.*]] = icmp slt i64 [[IV]], [[TMP42]] +; NO-EVL-NEXT: [[S_1]] = select i1 [[CMP6]], i32 [[TMP41]], i32 [[S_023]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; +entry: + %cmp21 = icmp sgt i32 %N, 0 + br i1 %cmp21, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %loop + %0 = or i32 %s.1, %spec.select + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %or = phi i32 [ %0, %exit.loopexit ], [ -1, %entry ] + ret i32 %or + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %s.023 = phi i32 [ -1, %loop.preheader ], [ %s.1, %loop ] + %t.022 = phi i32 [ -1, %loop.preheader ], [ %spec.select, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %data0, i64 %iv + %1 = load i32, ptr %arrayidx, align 4 + %2 = sext i32 %1 to i64 + %cmp1 = icmp slt i64 %iv, %2 + %spec.select = select i1 %cmp1, i32 %1, i32 %t.022 + %arrayidx5 = getelementptr inbounds i32, ptr %data1, i64 %iv + %3 = load i32, ptr %arrayidx5, align 4 + %4 = sext i32 %3 to i64 + %cmp6 = icmp slt i64 %iv, %4 + %s.1 = select i1 %cmp6, i32 %3, i32 %s.023 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; float csa_in_series_float_select(int N, float *data0, +; float *data1) { +; float t = 1.0f; +; float s = 1.0f; +; for (int i = 0; i < N; i++) { +; if (0.0f < data0[i]) +; t = data0[i]; +; if (0.0f [ shufflevector ( insertelement ( poison, float poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL8:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_DATA_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, float poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL7:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_MASK_PHI2:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_VL_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[CSA_VL_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_VL_PHI3:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[CSA_VL_SEL6:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[AVL:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[EVL_BASED_IV]] +; EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[TMP6]] +; EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP9:%.*]] = fcmp ogt [[VP_OP_LOAD]], zeroinitializer +; EVL-NEXT: [[ANY_ACTIVE:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP9]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[CSA_VL_SEL]] = select i1 [[ANY_ACTIVE]], i32 [[TMP5]], i32 [[CSA_VL_PHI]] +; EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[ANY_ACTIVE]], [[TMP9]], [[CSA_MASK_PHI2]] +; EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[ANY_ACTIVE]], [[VP_OP_LOAD]], [[CSA_DATA_PHI1]] +; EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[TMP6]] +; EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 +; EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP12:%.*]] = fcmp ogt [[VP_OP_LOAD4]], zeroinitializer +; EVL-NEXT: [[ANY_ACTIVE5:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP12]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[CSA_VL_SEL6]] = select i1 [[ANY_ACTIVE5]], i32 [[TMP5]], i32 [[CSA_VL_PHI3]] +; EVL-NEXT: [[CSA_MASK_SEL7]] = select i1 [[ANY_ACTIVE5]], [[TMP12]], [[CSA_MASK_PHI]] +; EVL-NEXT: [[CSA_DATA_SEL8]] = select i1 [[ANY_ACTIVE5]], [[VP_OP_LOAD4]], [[CSA_DATA_PHI]] +; EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP5]] to i64 +; EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] +; EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; EVL: [[MIDDLE_BLOCK]]: +; EVL-NEXT: [[CSA_STEP9:%.*]] = call @llvm.stepvector.nxv4i32() +; EVL-NEXT: [[TMP15:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -1, [[CSA_STEP9]], [[CSA_MASK_SEL7]], i32 [[CSA_VL_SEL6]]) +; EVL-NEXT: [[CSA_EXTRACT10:%.*]] = extractelement [[CSA_DATA_SEL8]], i32 [[TMP15]] +; EVL-NEXT: [[TMP16:%.*]] = icmp sge i32 [[TMP15]], 0 +; EVL-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[CSA_EXTRACT10]], float 1.000000e+00 +; EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -1, [[CSA_STEP]], [[CSA_MASK_SEL]], i32 [[CSA_VL_SEL]]) +; EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP18]] +; EVL-NEXT: [[TMP19:%.*]] = icmp sge i32 [[TMP18]], 0 +; EVL-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[CSA_EXTRACT]], float 1.000000e+00 +; EVL-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; EVL: [[SCALAR_PH]]: +; EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT]]: +; EVL-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ] +; EVL-NEXT: [[S_1_LCSSA:%.*]] = phi float [ [[S_1:%.*]], %[[LOOP]] ], [ [[TMP17]], %[[MIDDLE_BLOCK]] ] +; EVL-NEXT: [[TMP21:%.*]] = fadd float [[T_1_LCSSA]], [[S_1_LCSSA]] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[ADD:%.*]] = phi float [ [[TMP21]], %[[EXIT_LOOPEXIT]] ], [ 2.000000e+00, %[[ENTRY]] ] +; EVL-NEXT: ret float [[ADD]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[S_021:%.*]] = phi float [ 1.000000e+00, %[[SCALAR_PH]] ], [ [[S_1]], %[[LOOP]] ] +; EVL-NEXT: [[T_020:%.*]] = phi float [ 1.000000e+00, %[[SCALAR_PH]] ], [ [[T_1]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[IV]] +; EVL-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; EVL-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP22]], 0.000000e+00 +; EVL-NEXT: [[T_1]] = select i1 [[CMP1]], float [[TMP22]], float [[T_020]] +; EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[IV]] +; EVL-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +; EVL-NEXT: [[CMP6:%.*]] = fcmp ogt float [[TMP23]], 0.000000e+00 +; EVL-NEXT: [[S_1]] = select i1 [[CMP6]], float [[TMP23]], float [[S_021]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; +; NO-EVL-LABEL: define float @csa_in_series_float_select( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP19]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, float poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL5:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, float poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL4:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[TMP6]] +; NO-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-EVL-NEXT: [[TMP9:%.*]] = fcmp ogt [[WIDE_LOAD]], zeroinitializer +; NO-EVL-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP10]], [[TMP9]], [[CSA_MASK_PHI1]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP10]], [[WIDE_LOAD]], [[CSA_DATA_PHI2]] +; NO-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[TMP6]] +; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP12]], align 4 +; NO-EVL-NEXT: [[TMP13:%.*]] = fcmp ogt [[WIDE_LOAD3]], zeroinitializer +; NO-EVL-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP13]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL4]] = select i1 [[TMP14]], [[TMP13]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL5]] = select i1 [[TMP14]], [[WIDE_LOAD3]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP6:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP16:%.*]] = select [[CSA_MASK_SEL4]], [[CSA_STEP6]], zeroinitializer +; NO-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP16]]) +; NO-EVL-NEXT: [[TMP18:%.*]] = extractelement [[CSA_MASK_SEL4]], i64 0 +; NO-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], 0 +; NO-EVL-NEXT: [[TMP20:%.*]] = and i1 [[TMP18]], [[TMP19]] +; NO-EVL-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT7:%.*]] = extractelement [[CSA_DATA_SEL5]], i32 [[TMP21]] +; NO-EVL-NEXT: [[TMP22:%.*]] = icmp sge i32 [[TMP21]], 0 +; NO-EVL-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[CSA_EXTRACT7]], float 1.000000e+00 +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP24:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP24]]) +; NO-EVL-NEXT: [[TMP26:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP25]], 0 +; NO-EVL-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] +; NO-EVL-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP29]] +; NO-EVL-NEXT: [[TMP30:%.*]] = icmp sge i32 [[TMP29]], 0 +; NO-EVL-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], float [[CSA_EXTRACT]], float 1.000000e+00 +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[LOOP]] ], [ [[TMP31]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: [[S_1_LCSSA:%.*]] = phi float [ [[S_1:%.*]], %[[LOOP]] ], [ [[TMP23]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: [[TMP32:%.*]] = fadd float [[T_1_LCSSA]], [[S_1_LCSSA]] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[ADD:%.*]] = phi float [ [[TMP32]], %[[EXIT_LOOPEXIT]] ], [ 2.000000e+00, %[[ENTRY]] ] +; NO-EVL-NEXT: ret float [[ADD]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[S_021:%.*]] = phi float [ 1.000000e+00, %[[SCALAR_PH]] ], [ [[S_1]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_020:%.*]] = phi float [ 1.000000e+00, %[[SCALAR_PH]] ], [ [[T_1]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; NO-EVL-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP33]], 0.000000e+00 +; NO-EVL-NEXT: [[T_1]] = select i1 [[CMP1]], float [[TMP33]], float [[T_020]] +; NO-EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +; NO-EVL-NEXT: [[CMP6:%.*]] = fcmp ogt float [[TMP34]], 0.000000e+00 +; NO-EVL-NEXT: [[S_1]] = select i1 [[CMP6]], float [[TMP34]], float [[S_021]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; +entry: + %cmp19 = icmp sgt i32 %N, 0 + br i1 %cmp19, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %loop + %0 = fadd float %t.1, %s.1 + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %add = phi float [ %0, %exit.loopexit ], [ 2.000000e+00, %entry ] + ret float %add + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %s.021 = phi float [ 1.000000e+00, %loop.preheader ], [ %s.1, %loop ] + %t.020 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.1, %loop ] + %arrayidx = getelementptr inbounds float, ptr %data0, i64 %iv + %1 = load float, ptr %arrayidx, align 4 + %cmp1 = fcmp ogt float %1, 0.000000e+00 + %t.1 = select i1 %cmp1, float %1, float %t.020 + %arrayidx5 = getelementptr inbounds float, ptr %data1, i64 %iv + %2 = load float, ptr %arrayidx5, align 4 + %cmp6 = fcmp ogt float %2, 0.000000e+00 + %s.1 = select i1 %cmp6, float %2, float %s.021 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_in_series_int(int N, bool *cond0, bool *cond1, int *data0, int *data1) { +; int t = -1; +; int s = -1; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; if (cond1[i]) +; s = data1[i]; +; } +; return t | s; // use t and s +; } +define i32 @csa_in_series_int(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define i32 @csa_in_series_int( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[S_1_LCSSA:%.*]] = phi i32 [ [[S_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi i32 [ [[T_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[S_1_LCSSA]], [[T_1_LCSSA]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[OR:%.*]] = phi i32 [ [[TMP0]], %[[EXIT_LOOPEXIT]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: ret i32 [[OR]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[S_017:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[S_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[T_1]] = phi i32 [ [[TMP2]], %[[IF_THEN]] ], [ [[T_016]], %[[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[IF_THEN6:.*]] +; CHECK: [[IF_THEN6]]: +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[S_1]] = phi i32 [ [[TMP4]], %[[IF_THEN6]] ], [ [[S_017]], %[[IF_END]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %for.inc + %0 = or i32 %s.1, %t.1 + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %or = phi i32 [ %0, %exit.loopexit ], [ -1, %entry ] + ret i32 %or + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %s.017 = phi i32 [ -1, %loop.preheader ], [ %s.1, %for.inc ] + %t.016 = phi i32 [ -1, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %1 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %1, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds i32, ptr %data0, i64 %iv + %2 = load i32, ptr %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.then, %loop + %t.1 = phi i32 [ %2, %if.then ], [ %t.016, %loop ] + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %3 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %3, 0 + br i1 %tobool5.not, label %for.inc, label %if.then6 + +if.then6: ; preds = %if.end + %arrayidx8 = getelementptr inbounds i32, ptr %data1, i64 %iv + %4 = load i32, ptr %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %if.end, %if.then6 + %s.1 = phi i32 [ %4, %if.then6 ], [ %s.017, %if.end ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; float csa_in_series_float(int N, bool *cond0, bool *cond1, float *data0, +; float *data1) { +; float t = 1.0f; +; float s = 1.0f; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; if (cond1[i]) +; s = data1[i]; +; } +; return t + s; // use t and s +; } +define float @csa_in_series_float(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define float @csa_in_series_float( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[S_1_LCSSA:%.*]] = phi float [ [[S_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[TMP0:%.*]] = fadd float [[T_1_LCSSA]], [[S_1_LCSSA]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[ADD:%.*]] = phi float [ [[TMP0]], %[[EXIT_LOOPEXIT]] ], [ 2.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: ret float [[ADD]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[S_017:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[S_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[T_1]] = phi float [ [[TMP2]], %[[IF_THEN]] ], [ [[T_016]], %[[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[IF_THEN6:.*]] +; CHECK: [[IF_THEN6]]: +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[S_1]] = phi float [ [[TMP4]], %[[IF_THEN6]] ], [ [[S_017]], %[[IF_END]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %for.inc + %0 = fadd float %t.1, %s.1 + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %add = phi float [ %0, %exit.loopexit ], [ 2.000000e+00, %entry ] + ret float %add + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %s.017 = phi float [ 1.000000e+00, %loop.preheader ], [ %s.1, %for.inc ] + %t.016 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %1 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %1, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds float, ptr %data0, i64 %iv + %2 = load float, ptr %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.then, %loop + %t.1 = phi float [ %2, %if.then ], [ %t.016, %loop ] + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %3 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %3, 0 + br i1 %tobool5.not, label %for.inc, label %if.then6 + +if.then6: ; preds = %if.end + %arrayidx8 = getelementptr inbounds float, ptr %data1, i64 %iv + %4 = load float, ptr %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %if.end, %if.then6 + %s.1 = phi float [ %4, %if.then6 ], [ %s.017, %if.end ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_in_series_same_scalar_int_select(int N, int *data0, +; int *data1) { +; int t = -1; +; for (int i = 0; i < N; i++) { +; if (i < data0[i]) +; t = data0[i]; +; if (i < data1[i]) +; t = data1[i]; +; } +; return t; // use t +; } +define i32 @csa_in_series_same_scalar_int_select(i32 %N, ptr %data0, ptr %data1) { +; CHECK-LABEL: define i32 @csa_in_series_same_scalar_int_select( +; CHECK-SAME: i32 [[N:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP21]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_2_LCSSA:%.*]] = phi i32 [ [[T_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[T_2_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[T_022:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[T_2]], %[[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[IV]], [[TMP1]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP1]], i32 [[TMP0]], i32 [[T_022]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[CMP6:%.*]] = icmp slt i64 [[IV]], [[TMP3]] +; CHECK-NEXT: [[T_2]] = select i1 [[CMP6]], i32 [[TMP2]], i32 [[SPEC_SELECT]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp21 = icmp sgt i32 %N, 0 + br i1 %cmp21, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %loop, %entry + %t.0.lcssa = phi i32 [ -1, %entry ], [ %t.2, %loop ] + ret i32 %t.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %t.022 = phi i32 [ -1, %loop.preheader ], [ %t.2, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %data0, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %1 = sext i32 %0 to i64 + %cmp1 = icmp slt i64 %iv, %1 + %spec.select = select i1 %cmp1, i32 %0, i32 %t.022 + %arrayidx5 = getelementptr inbounds i32, ptr %data1, i64 %iv + %2 = load i32, ptr %arrayidx5, align 4 + %3 = sext i32 %2 to i64 + %cmp6 = icmp slt i64 %iv, %3 + %t.2 = select i1 %cmp6, i32 %2, i32 %spec.select + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; float csa_in_series_same_scalar_float_select(int N, +; float *data0, float *data1) { +; float t = 1.0f; +; for (int i = 0; i < N; i++) { +; if (0.0f < data0[i]) +; t = data0[i]; +; if (0.0f < data1[i]) +; t = data1[i]; +; } +; return t; // use t +; } +define float @csa_in_series_same_scalar_float_select(i32 %N, ptr %data0, ptr %data1) { +; CHECK-LABEL: define float @csa_in_series_same_scalar_float_select( +; CHECK-SAME: i32 [[N:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP19]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_2_LCSSA:%.*]] = phi float [ [[T_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[T_2_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[T_020:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[T_2]], %[[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[T_1:%.*]] = select i1 [[CMP1]], float [[TMP0]], float [[T_020]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt float [[TMP1]], 0.000000e+00 +; CHECK-NEXT: [[T_2]] = select i1 [[CMP6]], float [[TMP1]], float [[T_1]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp19 = icmp sgt i32 %N, 0 + br i1 %cmp19, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %loop, %entry + %t.0.lcssa = phi float [ 1.000000e+00, %entry ], [ %t.2, %loop ] + ret float %t.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %t.020 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.2, %loop ] + %arrayidx = getelementptr inbounds float, ptr %data0, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 0.000000e+00 + %t.1 = select i1 %cmp1, float %0, float %t.020 + %arrayidx5 = getelementptr inbounds float, ptr %data1, i64 %iv + %1 = load float, ptr %arrayidx5, align 4 + %cmp6 = fcmp ogt float %1, 0.000000e+00 + %t.2 = select i1 %cmp6, float %1, float %t.1 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_in_series_same_scalar_int(int N, bool *cond0, bool *cond1, int *data0, +; int *data1) { +; int t = -1; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; if (cond1[i]) +; t = data1[i]; +; } +; return t; // use t +; } +define i32 @csa_in_series_same_scalar_int(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define i32 @csa_in_series_same_scalar_int( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_2_LCSSA:%.*]] = phi i32 [ [[T_2:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[T_2_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[T_2]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[T_1:%.*]] = phi i32 [ [[TMP1]], %[[IF_THEN]] ], [ [[T_016]], %[[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[IF_THEN6:.*]] +; CHECK: [[IF_THEN6]]: +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_2]] = phi i32 [ [[TMP3]], %[[IF_THEN6]] ], [ [[T_1]], %[[IF_END]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %for.inc, %entry + %t.0.lcssa = phi i32 [ -1, %entry ], [ %t.2, %for.inc ] + ret i32 %t.0.lcssa + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %t.016 = phi i32 [ -1, %loop.preheader ], [ %t.2, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %0 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds i32, ptr %data0, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.then, %loop + %t.1 = phi i32 [ %1, %if.then ], [ %t.016, %loop ] + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %2 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %2, 0 + br i1 %tobool5.not, label %for.inc, label %if.then6 + +if.then6: ; preds = %if.end + %arrayidx8 = getelementptr inbounds i32, ptr %data1, i64 %iv + %3 = load i32, ptr %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %if.end, %if.then6 + %t.2 = phi i32 [ %3, %if.then6 ], [ %t.1, %if.end ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; float csa_in_series_same_scalar_float(int N, bool *cond0, bool *cond1, +; float *data0, float *data1) { +; float t = 1.0f; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; if (cond1[i]) +; t = data1[i]; +; } +; return t; // use t +; } +define float @csa_in_series_same_scalar_float(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define float @csa_in_series_same_scalar_float( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_2_LCSSA:%.*]] = phi float [ [[T_2:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[T_2_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[T_2]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[T_1:%.*]] = phi float [ [[TMP1]], %[[IF_THEN]] ], [ [[T_016]], %[[LOOP]] ] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[IF_THEN6:.*]] +; CHECK: [[IF_THEN6]]: +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_2]] = phi float [ [[TMP3]], %[[IF_THEN6]] ], [ [[T_1]], %[[IF_END]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %for.inc, %entry + %t.0.lcssa = phi float [ 1.000000e+00, %entry ], [ %t.2, %for.inc ] + ret float %t.0.lcssa + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %t.016 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.2, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %0 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds float, ptr %data0, i64 %iv + %1 = load float, ptr %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.then, %loop + %t.1 = phi float [ %1, %if.then ], [ %t.016, %loop ] + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %2 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %2, 0 + br i1 %tobool5.not, label %for.inc, label %if.then6 + +if.then6: ; preds = %if.end + %arrayidx8 = getelementptr inbounds float, ptr %data1, i64 %iv + %3 = load float, ptr %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %if.end, %if.then6 + %t.2 = phi float [ %3, %if.then6 ], [ %t.1, %if.end ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_same_cond_int(int N, bool *cond, int *data0, int *data1) { +; int t = -1; +; int s = -1; +; for (int i = 0; i < N; i++) { +; if (cond[i]) { +; t = data0[i]; +; s = data1[i]; +; } +; } +; return t | s; // use t and s +; } +define i32 @csa_same_cond_int(i32 %N, ptr %cond, ptr %data0, ptr %data1) { +; CHECK-LABEL: define i32 @csa_same_cond_int( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi i32 [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[S_1_LCSSA:%.*]] = phi i32 [ [[S_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[S_1_LCSSA]], [[T_1_LCSSA]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[OR:%.*]] = phi i32 [ [[TMP0]], %[[EXIT_LOOPEXIT]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: ret i32 [[OR]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[S_011:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[S_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_010:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi i32 [ [[TMP2]], %[[IF_THEN]] ], [ [[T_010]], %[[LOOP]] ] +; CHECK-NEXT: [[S_1]] = phi i32 [ [[TMP3]], %[[IF_THEN]] ], [ [[S_011]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %for.inc + %0 = or i32 %s.1, %t.1 + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %or = phi i32 [ %0, %exit.loopexit ], [ -1, %entry ] + ret i32 %or + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %s.011 = phi i32 [ -1, %loop.preheader ], [ %s.1, %for.inc ] + %t.010 = phi i32 [ -1, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond, i64 %iv + %1 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %1, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds i32, ptr %data0, i64 %iv + %2 = load i32, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %data1, i64 %iv + %3 = load i32, ptr %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %loop, %if.then + %t.1 = phi i32 [ %2, %if.then ], [ %t.010, %loop ] + %s.1 = phi i32 [ %3, %if.then ], [ %s.011, %loop ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; float csa_same_cond_float(int N, bool *cond, float *data0, float *data1) { +; float t = 1.0f; +; float s = 1.0f; +; for (int i = 0; i < N; i++) { +; if (cond[i]) { +; t = data0[i]; +; s = data1[i]; +; } +; } +; return t + s; // use t and s +; } +define float @csa_same_cond_float(i32 %N, ptr %cond, ptr %data0, ptr %data1) { +; CHECK-LABEL: define float @csa_same_cond_float( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[S_1_LCSSA:%.*]] = phi float [ [[S_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[TMP0:%.*]] = fadd float [[T_1_LCSSA]], [[S_1_LCSSA]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[ADD:%.*]] = phi float [ [[TMP0]], %[[EXIT_LOOPEXIT]] ], [ 2.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: ret float [[ADD]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[S_011:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[S_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_010:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi float [ [[TMP2]], %[[IF_THEN]] ], [ [[T_010]], %[[LOOP]] ] +; CHECK-NEXT: [[S_1]] = phi float [ [[TMP3]], %[[IF_THEN]] ], [ [[S_011]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %for.inc + %0 = fadd float %t.1, %s.1 + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %add = phi float [ %0, %exit.loopexit ], [ 2.000000e+00, %entry ] + ret float %add + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %s.011 = phi float [ 1.000000e+00, %loop.preheader ], [ %s.1, %for.inc ] + %t.010 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond, i64 %iv + %1 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %1, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds float, ptr %data0, i64 %iv + %2 = load float, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %data1, i64 %iv + %3 = load float, ptr %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %loop, %if.then + %t.1 = phi float [ %2, %if.then ], [ %t.010, %loop ] + %s.1 = phi float [ %3, %if.then ], [ %s.011, %loop ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_else_if_same_scalar_int(int N, bool *cond0, bool *cond1, int *data0, +; int *data1) { +; int t = -1; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; else if (cond1[i]) +; t = data1[i]; +; } +; return t; // use t +; } +define i32 @csa_else_if_same_scalar_int(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define i32 @csa_else_if_same_scalar_int( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi i32 [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[T_1_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_ELSE:.*]], label %[[FOR_INC_SINK_SPLIT:.*]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[FOR_INC_SINK_SPLIT]] +; CHECK: [[FOR_INC_SINK_SPLIT]]: +; CHECK-NEXT: [[DATA0_SINK:%.*]] = phi ptr [ [[DATA0]], %[[LOOP]] ], [ [[DATA1]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DATA0_SINK]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi i32 [ [[T_016]], %[[IF_ELSE]] ], [ [[TMP2]], %[[FOR_INC_SINK_SPLIT]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %for.inc, %entry + %t.0.lcssa = phi i32 [ -1, %entry ], [ %t.1, %for.inc ] + ret i32 %t.0.lcssa + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %t.016 = phi i32 [ -1, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %0 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %if.else, label %for.inc.sink.split + +if.else: ; preds = %loop + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %1 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %1, 0 + br i1 %tobool5.not, label %for.inc, label %for.inc.sink.split + +for.inc.sink.split: ; preds = %if.else, %loop + %data0.sink = phi ptr [ %data0, %loop ], [ %data1, %if.else ] + %arrayidx2 = getelementptr inbounds i32, ptr %data0.sink, i64 %iv + %2 = load i32, ptr %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.inc.sink.split, %if.else + %t.1 = phi i32 [ %t.016, %if.else ], [ %2, %for.inc.sink.split ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; float csa_else_if_same_scalar_float(int N, bool *cond0, bool *cond1, +; float *data0, float *data1) { +; float t = 1.0f; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; else if (cond1[i]) +; t = data1[i]; +; } +; return t; // use t +; } +define float @csa_else_if_same_scalar_float(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define float @csa_else_if_same_scalar_float( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[T_0_LCSSA:%.*]] = phi float [ 1.000000e+00, %[[ENTRY]] ], [ [[T_1_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[T_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_ELSE:.*]], label %[[FOR_INC_SINK_SPLIT:.*]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[FOR_INC_SINK_SPLIT]] +; CHECK: [[FOR_INC_SINK_SPLIT]]: +; CHECK-NEXT: [[DATA0_SINK:%.*]] = phi ptr [ [[DATA0]], %[[LOOP]] ], [ [[DATA1]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[DATA0_SINK]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi float [ [[T_016]], %[[IF_ELSE]] ], [ [[TMP2]], %[[FOR_INC_SINK_SPLIT]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %for.inc, %entry + %t.0.lcssa = phi float [ 1.000000e+00, %entry ], [ %t.1, %for.inc ] + ret float %t.0.lcssa + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %t.016 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %0 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %0, 0 + br i1 %tobool.not, label %if.else, label %for.inc.sink.split + +if.else: ; preds = %loop + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %1 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %1, 0 + br i1 %tobool5.not, label %for.inc, label %for.inc.sink.split + +for.inc.sink.split: ; preds = %if.else, %loop + %data0.sink = phi ptr [ %data0, %loop ], [ %data1, %if.else ] + %arrayidx2 = getelementptr inbounds float, ptr %data0.sink, i64 %iv + %2 = load float, ptr %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.inc.sink.split, %if.else + %t.1 = phi float [ %t.016, %if.else ], [ %2, %for.inc.sink.split ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; int csa_else_if_int(int N, bool *cond0, bool *cond1, int *data0, int *data1) { +; int t = -1; +; int s = -1; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; else if (cond1[i]) +; s = data1[i]; +; } +; return t | s; // use t and s +; } +define i32 @csa_else_if_int(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define i32 @csa_else_if_int( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi i32 [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[S_1_LCSSA:%.*]] = phi i32 [ [[S_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[S_1_LCSSA]], [[T_1_LCSSA]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[OR:%.*]] = phi i32 [ [[TMP0]], %[[EXIT_LOOPEXIT]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: ret i32 [[OR]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[S_017:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[S_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi i32 [ -1, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[IF_THEN6:.*]] +; CHECK: [[IF_THEN6]]: +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi i32 [ [[TMP2]], %[[IF_THEN]] ], [ [[T_016]], %[[IF_THEN6]] ], [ [[T_016]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[S_1]] = phi i32 [ [[S_017]], %[[IF_THEN]] ], [ [[TMP4]], %[[IF_THEN6]] ], [ [[S_017]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %for.inc + %0 = or i32 %s.1, %t.1 + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %or = phi i32 [ %0, %exit.loopexit ], [ -1, %entry ] + ret i32 %or + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %s.017 = phi i32 [ -1, %loop.preheader ], [ %s.1, %for.inc ] + %t.016 = phi i32 [ -1, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %1 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %1, 0 + br i1 %tobool.not, label %if.else, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds i32, ptr %data0, i64 %iv + %2 = load i32, ptr %arrayidx2, align 4 + br label %for.inc + +if.else: ; preds = %loop + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %3 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %3, 0 + br i1 %tobool5.not, label %for.inc, label %if.then6 + +if.then6: ; preds = %if.else + %arrayidx8 = getelementptr inbounds i32, ptr %data1, i64 %iv + %4 = load i32, ptr %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %if.then, %if.then6, %if.else + %t.1 = phi i32 [ %2, %if.then ], [ %t.016, %if.then6 ], [ %t.016, %if.else ] + %s.1 = phi i32 [ %s.017, %if.then ], [ %4, %if.then6 ], [ %s.017, %if.else ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; float csa_else_if_float(int N, bool *cond0, bool *cond1, float *data0, +; float *data1) { +; float t = 1.0f; +; float s = 1.0f; +; for (int i = 0; i < N; i++) { +; if (cond0[i]) +; t = data0[i]; +; else if (cond1[i]) +; s = data1[i]; +; } +; return t + s; // use t and s +; } +define float @csa_else_if_float(i32 %N, ptr %cond0, ptr %cond1, ptr %data0, ptr %data1) { +; CHECK-LABEL: define float @csa_else_if_float( +; CHECK-SAME: i32 [[N:%.*]], ptr [[COND0:%.*]], ptr [[COND1:%.*]], ptr [[DATA0:%.*]], ptr [[DATA1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP15]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[T_1_LCSSA:%.*]] = phi float [ [[T_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[S_1_LCSSA:%.*]] = phi float [ [[S_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[TMP0:%.*]] = fadd float [[T_1_LCSSA]], [[S_1_LCSSA]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[ADD:%.*]] = phi float [ [[TMP0]], %[[EXIT_LOOPEXIT]] ], [ 2.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: ret float [[ADD]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[S_017:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[S_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[T_016:%.*]] = phi float [ 1.000000e+00, %[[LOOP_PREHEADER]] ], [ [[T_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[COND0]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[DATA0]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[COND1]], i64 [[IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[TOBOOL5_NOT:%.*]] = icmp eq i8 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TOBOOL5_NOT]], label %[[FOR_INC]], label %[[IF_THEN6:.*]] +; CHECK: [[IF_THEN6]]: +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[DATA1]], i64 [[IV]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[T_1]] = phi float [ [[TMP2]], %[[IF_THEN]] ], [ [[T_016]], %[[IF_THEN6]] ], [ [[T_016]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[S_1]] = phi float [ [[S_017]], %[[IF_THEN]] ], [ [[TMP4]], %[[IF_THEN6]] ], [ [[S_017]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp15 = icmp sgt i32 %N, 0 + br i1 %cmp15, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit.loopexit: ; preds = %for.inc + %0 = fadd float %t.1, %s.1 + br label %exit + +exit: ; preds = %exit.loopexit, %entry + %add = phi float [ %0, %exit.loopexit ], [ 2.000000e+00, %entry ] + ret float %add + +loop: ; preds = %loop.preheader, %for.inc + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %for.inc ] + %s.017 = phi float [ 1.000000e+00, %loop.preheader ], [ %s.1, %for.inc ] + %t.016 = phi float [ 1.000000e+00, %loop.preheader ], [ %t.1, %for.inc ] + %arrayidx = getelementptr inbounds i8, ptr %cond0, i64 %iv + %1 = load i8, ptr %arrayidx, align 1 + %tobool.not = icmp eq i8 %1, 0 + br i1 %tobool.not, label %if.else, label %if.then + +if.then: ; preds = %loop + %arrayidx2 = getelementptr inbounds float, ptr %data0, i64 %iv + %2 = load float, ptr %arrayidx2, align 4 + br label %for.inc + +if.else: ; preds = %loop + %arrayidx4 = getelementptr inbounds i8, ptr %cond1, i64 %iv + %3 = load i8, ptr %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %3, 0 + br i1 %tobool5.not, label %for.inc, label %if.then6 + +if.then6: ; preds = %if.else + %arrayidx8 = getelementptr inbounds float, ptr %data1, i64 %iv + %4 = load float, ptr %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %if.then, %if.then6, %if.else + %t.1 = phi float [ %2, %if.then ], [ %t.016, %if.then6 ], [ %t.016, %if.else ] + %s.1 = phi float [ %s.017, %if.then ], [ %4, %if.then6 ], [ %s.017, %if.else ] + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit.loopexit, label %loop +} + +; This function is generated from the following C/C++ program: +; uint64_t idx_scalar(int64_t *a, int64_t *b, uint64_t ii, uint64_t n) { +; uint64_t idx = ii; +; for (uint64_t i = 0; i < n; ++i) +; idx = (a[i] > b[i]) ? i : idx; +; return idx; +; } +define i64 @idx_scalar(ptr %a, ptr %b, i64 %ii, i64 %n) { +; EVL-LABEL: define i64 @idx_scalar( +; EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP8_NOT:%.*]] = icmp eq i64 [[N]], 0 +; EVL-NEXT: br i1 [[CMP8_NOT]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT:.*]]: +; EVL-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[IDX_0_LCSSA:%.*]] = phi i64 [ [[II]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; EVL-NEXT: ret i64 [[IDX_0_LCSSA]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[I_010:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; EVL-NEXT: [[IDX_09:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ], [ [[II]], %[[LOOP_PREHEADER]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_010]] +; EVL-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_010]] +; EVL-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; EVL-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; EVL-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[I_010]], i64 [[IDX_09]] +; EVL-NEXT: [[INC]] = add nuw i64 [[I_010]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +; NO-EVL-LABEL: define i64 @idx_scalar( +; NO-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP8_NOT:%.*]] = icmp eq i64 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP8_NOT]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; NO-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() +; NO-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) +; NO-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-EVL-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] +; NO-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; NO-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i64 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 +; NO-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]] +; NO-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP14]], align 8 +; NO-EVL-NEXT: [[TMP15:%.*]] = icmp sgt [[WIDE_LOAD]], [[WIDE_LOAD1]] +; NO-EVL-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.nxv2i1( [[TMP15]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP16]], [[TMP15]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP16]], [[VEC_IND]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv2i32() +; NO-EVL-NEXT: [[TMP18:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.umax.nxv2i32( [[TMP18]]) +; NO-EVL-NEXT: [[TMP20:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], 0 +; NO-EVL-NEXT: [[TMP22:%.*]] = and i1 [[TMP20]], [[TMP21]] +; NO-EVL-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP23]] +; NO-EVL-NEXT: [[TMP24:%.*]] = icmp sge i32 [[TMP23]], 0 +; NO-EVL-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[CSA_EXTRACT]], i64 [[II]] +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[IDX_0_LCSSA:%.*]] = phi i64 [ [[II]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; NO-EVL-NEXT: ret i64 [[IDX_0_LCSSA]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[I_010:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; NO-EVL-NEXT: [[IDX_09:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ], [ [[II]], %[[SCALAR_PH]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_010]] +; NO-EVL-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; NO-EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_010]] +; NO-EVL-NEXT: [[TMP27:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; NO-EVL-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +; NO-EVL-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[I_010]], i64 [[IDX_09]] +; NO-EVL-NEXT: [[INC]] = add nuw i64 [[I_010]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; +entry: + %cmp8.not = icmp eq i64 %n, 0 + br i1 %cmp8.not, label %exit, label %loop.preheader + +loop.preheader: ; preds = %entry + br label %loop + +exit: ; preds = %loop, %entry + %idx.0.lcssa = phi i64 [ %ii, %entry ], [ %cond, %loop ] + ret i64 %idx.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %i.010 = phi i64 [ %inc, %loop ], [ 0, %loop.preheader ] + %idx.09 = phi i64 [ %cond, %loop ], [ %ii, %loop.preheader ] + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %i.010 + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %i.010 + %1 = load i64, ptr %arrayidx1, align 8 + %cmp2 = icmp sgt i64 %0, %1 + %cond = select i1 %cmp2, i64 %i.010, i64 %idx.09 + %inc = add nuw i64 %i.010, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; uint64_t idx_scalar_dec(int64_t *a, int64_t *b, uint64_t ii, uint64_t n) { +; uint64_t idx = ii; +; for (uint64_t i = n; i > 0; --i) // decreasing +; idx = (a[i - 1] > b[i - 1]) ? i : idx; +; return idx; +; } +define dso_local i64 @idx_scalar_dec(ptr %a, ptr %b, i64 %ii, i64 %n) { +; CHECK-LABEL: define dso_local i64 @idx_scalar_dec( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP_NOT9:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT9]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[EXIT_LOOPEXIT:.*]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IDX_0_LCSSA:%.*]] = phi i64 [ [[II]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[IDX_0_LCSSA]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[SUB:%.*]], %[[LOOP]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_010:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ], [ [[II]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[SUB]] = add i64 [[I_011]], -1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[SUB]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[SUB]] +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP3]], i64 [[I_011]], i64 [[IDX_010]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[SUB]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp.not9 = icmp eq i64 %n, 0 + br i1 %cmp.not9, label %exit, label %loop.preheader + +loop.preheader: ; preds = %entry + br label %loop + +exit: ; preds = %loop, %entry + %idx.0.lcssa = phi i64 [ %ii, %entry ], [ %cond, %loop ] + ret i64 %idx.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %i.011 = phi i64 [ %sub, %loop ], [ %n, %loop.preheader ] + %idx.010 = phi i64 [ %cond, %loop ], [ %ii, %loop.preheader ] + %sub = add i64 %i.011, -1 + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %sub + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 %sub + %1 = load i64, ptr %arrayidx2, align 8 + %cmp3 = icmp sgt i64 %0, %1 + %cond = select i1 %cmp3, i64 %i.011, i64 %idx.010 + %cmp.not = icmp eq i64 %sub, 0 + br i1 %cmp.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; The key part of this function is that the true arm of the select corresponds +; to selecting the initial value, instead of selecting the new value. +; int simple_csa_int_select_neg_cond(int N, int *data) { +; int t = 0; +; for (int i = 0; i < N; i++) { +; if (i != data[i]) +; t = data[i]; +; } +; return t; // use t +; } +define i32 @simple_csa_int_select_neg_cond(i32 %N, ptr %data) { +; EVL-LABEL: define i32 @simple_csa_int_select_neg_cond( +; EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]]) #[[ATTR0]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT:.*]]: +; EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; EVL-NEXT: ret i32 [[T_0_LCSSA]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[T_010:%.*]] = phi i32 [ 0, %[[LOOP_PREHEADER]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] +; EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; EVL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; EVL-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[IV]], [[TMP1]] +; EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1_NOT]], i32 [[T_010]], i32 [[TMP0]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +; NO-EVL-LABEL: define i32 @simple_csa_int_select_neg_cond( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-EVL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] +; NO-EVL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-EVL: [[VECTOR_PH]]: +; NO-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] +; NO-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; NO-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-EVL-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() +; NO-EVL-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) +; NO-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; NO-EVL-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] +; NO-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; NO-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; NO-EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-EVL: [[VECTOR_BODY]]: +; NO-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; NO-EVL-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; NO-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[TMP10]] +; NO-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; NO-EVL-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; NO-EVL-NEXT: [[TMP13:%.*]] = zext [[WIDE_LOAD]] to +; NO-EVL-NEXT: [[TMP14:%.*]] = icmp eq [[VEC_IND]], [[TMP13]] +; NO-EVL-NEXT: [[TMP15:%.*]] = xor [[TMP14]], splat (i1 true) +; NO-EVL-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) +; NO-EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[TMP16]], [[TMP15]], [[CSA_MASK_PHI]] +; NO-EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[TMP16]], [[WIDE_LOAD]], [[CSA_DATA_PHI]] +; NO-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; NO-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; NO-EVL: [[MIDDLE_BLOCK]]: +; NO-EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-EVL-NEXT: [[TMP18:%.*]] = select [[CSA_MASK_SEL]], [[CSA_STEP]], zeroinitializer +; NO-EVL-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP18]]) +; NO-EVL-NEXT: [[TMP20:%.*]] = extractelement [[CSA_MASK_SEL]], i64 0 +; NO-EVL-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], 0 +; NO-EVL-NEXT: [[TMP22:%.*]] = and i1 [[TMP20]], [[TMP21]] +; NO-EVL-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 0, i32 -1 +; NO-EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP23]] +; NO-EVL-NEXT: [[TMP24:%.*]] = icmp sge i32 [[TMP23]], 0 +; NO-EVL-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[CSA_EXTRACT]], i32 0 +; NO-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; NO-EVL-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; NO-EVL: [[SCALAR_PH]]: +; NO-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT]]: +; NO-EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[T_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; NO-EVL-NEXT: ret i32 [[T_0_LCSSA]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_010:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +; NO-EVL-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[IV]], [[TMP27]] +; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1_NOT]], i32 [[T_010]], i32 [[TMP26]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %loop.preheader, label %exit + +loop.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: ; preds = %loop, %entry + %t.0.lcssa = phi i32 [ 0, %entry ], [ %spec.select, %loop ] + ret i32 %t.0.lcssa + +loop: ; preds = %loop.preheader, %loop + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %t.010 = phi i32 [ 0, %loop.preheader ], [ %spec.select, %loop ] + %arrayidx = getelementptr inbounds i32, ptr %data, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %1 = zext i32 %0 to i64 + %cmp1.not = icmp eq i64 %iv, %1 + %spec.select = select i1 %cmp1.not, i32 %t.010, i32 %0 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} + +; This function is generated from the following C/C++ program: +; int *simple_csa_ptr_select(int N, int **data) { +; int *t = nullptr; +; for (int i = 0; i < N; i++) { +; if (a < *data[i]) +; t = data[i]; +; } +; return t; // use t +; } +define ptr @simple_csa_ptr_select(i32 %N, ptr %data, i64 %a) { +; EVL-LABEL: define ptr @simple_csa_ptr_select( +; EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]], i64 [[A:%.*]]) #[[ATTR0]] { +; EVL-NEXT: [[ENTRY:.*]]: +; EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; EVL: [[LOOP_PREHEADER]]: +; EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; EVL: [[VECTOR_PH]]: +; EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP2]] +; EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 +; EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; EVL-NEXT: br label %[[VECTOR_BODY:.*]] +; EVL: [[VECTOR_BODY]]: +; EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_DATA_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, ptr poison, i64 0), poison, zeroinitializer), %[[VECTOR_PH]] ], [ [[CSA_DATA_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_MASK_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[CSA_MASK_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[CSA_VL_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[CSA_VL_SEL:%.*]], %[[VECTOR_BODY]] ] +; EVL-NEXT: [[AVL:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[EVL_BASED_IV]] +; EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; EVL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[DATA]], i64 [[TMP6]] +; EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i32 0 +; EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2p0.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i32.nxv2p0( align 4 [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP9:%.*]] = call @llvm.vp.sext.nxv2i64.nxv2i32( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[TMP10:%.*]] = icmp slt [[BROADCAST_SPLAT]], [[TMP9]] +; EVL-NEXT: [[ANY_ACTIVE:%.*]] = call i1 @llvm.vp.reduce.or.nxv2i1(i1 false, [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; EVL-NEXT: [[CSA_VL_SEL]] = select i1 [[ANY_ACTIVE]], i32 [[TMP5]], i32 [[CSA_VL_PHI]] +; EVL-NEXT: [[CSA_MASK_SEL]] = select i1 [[ANY_ACTIVE]], [[TMP10]], [[CSA_MASK_PHI]] +; EVL-NEXT: [[CSA_DATA_SEL]] = select i1 [[ANY_ACTIVE]], [[VP_OP_LOAD]], [[CSA_DATA_PHI]] +; EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP5]] to i64 +; EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] +; EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; EVL-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; EVL: [[MIDDLE_BLOCK]]: +; EVL-NEXT: [[CSA_STEP:%.*]] = call @llvm.stepvector.nxv2i32() +; EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 -1, [[CSA_STEP]], [[CSA_MASK_SEL]], i32 [[CSA_VL_SEL]]) +; EVL-NEXT: [[CSA_EXTRACT:%.*]] = extractelement [[CSA_DATA_SEL]], i32 [[TMP13]] +; EVL-NEXT: [[TMP14:%.*]] = icmp sge i32 [[TMP13]], 0 +; EVL-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], ptr [[CSA_EXTRACT]], ptr null +; EVL-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; EVL: [[SCALAR_PH]]: +; EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; EVL-NEXT: br label %[[LOOP:.*]] +; EVL: [[EXIT_LOOPEXIT]]: +; EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi ptr [ [[SPEC_SELECT:%.*]], %[[LOOP]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ] +; EVL-NEXT: br label %[[EXIT]] +; EVL: [[EXIT]]: +; EVL-NEXT: [[T_0_LCSSA:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; EVL-NEXT: ret ptr [[T_0_LCSSA]] +; EVL: [[LOOP]]: +; EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; EVL-NEXT: [[T_010:%.*]] = phi ptr [ null, %[[SCALAR_PH]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[DATA]], i64 [[IV]] +; EVL-NEXT: [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +; EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; EVL-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 +; EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[A]], [[TMP18]] +; EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], ptr [[TMP16]], ptr [[T_010]] +; EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; +; NO-EVL-LABEL: define ptr @simple_csa_ptr_select( +; NO-EVL-SAME: i32 [[N:%.*]], ptr [[DATA:%.*]], i64 [[A:%.*]]) #[[ATTR0]] { +; NO-EVL-NEXT: [[ENTRY:.*]]: +; NO-EVL-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 +; NO-EVL-NEXT: br i1 [[CMP9]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; NO-EVL: [[LOOP_PREHEADER]]: +; NO-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; NO-EVL-NEXT: br label %[[LOOP:.*]] +; NO-EVL: [[EXIT_LOOPEXIT:.*]]: +; NO-EVL-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi ptr [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: br label %[[EXIT]] +; NO-EVL: [[EXIT]]: +; NO-EVL-NEXT: [[T_0_LCSSA:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; NO-EVL-NEXT: ret ptr [[T_0_LCSSA]] +; NO-EVL: [[LOOP]]: +; NO-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; NO-EVL-NEXT: [[T_010:%.*]] = phi ptr [ null, %[[LOOP_PREHEADER]] ], [ [[SPEC_SELECT]], %[[LOOP]] ] +; NO-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[DATA]], i64 [[IV]] +; NO-EVL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +; NO-EVL-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +; NO-EVL-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; NO-EVL-NEXT: [[CMP1:%.*]] = icmp slt i64 [[A]], [[TMP2]] +; NO-EVL-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], ptr [[TMP0]], ptr [[T_010]] +; NO-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; NO-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] +; +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %loop.preheader, label %exit + +loop.preheader: + %wide.trip.count = zext i32 %N to i64 + br label %loop + +exit: + %t.0.lcssa = phi ptr [ null, %entry ], [ %spec.select, %loop ] + ret ptr %t.0.lcssa + +loop: + %iv = phi i64 [ 0, %loop.preheader ], [ %iv.next, %loop ] + %t.010 = phi ptr [ null, %loop.preheader ], [ %spec.select, %loop ] + %arrayidx = getelementptr inbounds ptr, ptr %data, i64 %iv + %0 = load ptr, ptr %arrayidx, align 8 + %1 = load i32, ptr %0, align 4 + %2 = sext i32 %1 to i64 + %cmp1 = icmp slt i64 %a, %2 + %spec.select = select i1 %cmp1, ptr %0, ptr %t.010 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond.not, label %exit, label %loop +} +;. +; EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +;. +; NO-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; NO-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; NO-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; NO-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; NO-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; NO-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; NO-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; NO-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; NO-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; NO-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; NO-EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; NO-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; NO-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; NO-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +; NO-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} +; NO-EVL: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} +; NO-EVL: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} +; NO-EVL: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]} +;.