LLVM 20.0.0git
|
#include "Target/AMDGPU/AMDGPUSubtarget.h"
Public Types | |
enum | Generation { INVALID = 0 , R600 = 1 , R700 = 2 , EVERGREEN = 3 , NORTHERN_ISLANDS = 4 , SOUTHERN_ISLANDS = 5 , SEA_ISLANDS = 6 , VOLCANIC_ISLANDS = 7 , GFX9 = 8 , GFX10 = 9 , GFX11 = 10 , GFX12 = 11 } |
Public Member Functions | |
AMDGPUSubtarget (Triple TT) | |
std::pair< unsigned, unsigned > | getDefaultFlatWorkGroupSize (CallingConv::ID CC) const |
std::pair< unsigned, unsigned > | getFlatWorkGroupSizes (const Function &F) const |
std::pair< unsigned, unsigned > | getWavesPerEU (const Function &F) const |
std::pair< unsigned, unsigned > | getWavesPerEU (const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const |
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself. | |
std::pair< unsigned, unsigned > | getEffectiveWavesPerEU (std::pair< unsigned, unsigned > WavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const |
unsigned | getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const |
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount. | |
std::pair< unsigned, unsigned > | getOccupancyWithWorkGroupSizes (uint32_t LDSBytes, const Function &F) const |
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F and each workgroup running the function requires LDSBytes bytes of LDS space. | |
std::pair< unsigned, unsigned > | getOccupancyWithWorkGroupSizes (const MachineFunction &MF) const |
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF . | |
bool | isAmdHsaOS () const |
bool | isAmdPalOS () const |
bool | isMesa3DOS () const |
bool | isMesaKernel (const Function &F) const |
bool | isAmdHsaOrMesa (const Function &F) const |
bool | isGCN () const |
bool | isGCN3Encoding () const |
bool | has16BitInsts () const |
bool | hasTrue16BitInsts () const |
Return true if the subtarget supports True16 instructions. | |
bool | useRealTrue16Insts () const |
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated. | |
bool | hasBF16ConversionInsts () const |
bool | hasMadMixInsts () const |
bool | hasFP8ConversionScaleInsts () const |
bool | hasBF8ConversionScaleInsts () const |
bool | hasFP4ConversionScaleInsts () const |
bool | hasFP6BF6ConversionScaleInsts () const |
bool | hasF16BF16ToFP6BF6ConversionScaleInsts () const |
bool | hasCvtPkF16F32Inst () const |
bool | hasF32ToF16BF16ConversionSRInsts () const |
bool | hasMadMacF32Insts () const |
bool | hasDsSrc2Insts () const |
bool | hasSDWA () const |
bool | hasVOP3PInsts () const |
bool | hasMulI24 () const |
bool | hasMulU24 () const |
bool | hasSMulHi () const |
bool | hasInv2PiInlineImm () const |
bool | hasFminFmaxLegacy () const |
bool | hasTrigReducedRange () const |
bool | hasFastFMAF32 () const |
bool | isPromoteAllocaEnabled () const |
unsigned | getWavefrontSize () const |
unsigned | getWavefrontSizeLog2 () const |
unsigned | getLocalMemorySize () const |
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU. | |
unsigned | getAddressableLocalMemorySize () const |
Return the maximum number of bytes of LDS that can be allocated to a single workgroup. | |
unsigned | getEUsPerCU () const |
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped. | |
Align | getAlignmentForImplicitArgPtr () const |
unsigned | getExplicitKernelArgOffset () const |
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument. | |
virtual unsigned | getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0 |
virtual unsigned | getMinFlatWorkGroupSize () const =0 |
virtual unsigned | getMaxFlatWorkGroupSize () const =0 |
virtual unsigned | getWavesPerEUForWorkGroup (unsigned FlatWorkGroupSize) const =0 |
virtual unsigned | getMinWavesPerEU () const =0 |
unsigned | getMaxWavesPerEU () const |
unsigned | getMaxWorkitemID (const Function &Kernel, unsigned Dimension) const |
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension. | |
SmallVector< unsigned > | getMaxNumWorkGroups (const Function &F) const |
Return the number of work groups for the function. | |
bool | isSingleLaneExecution (const Function &Kernel) const |
Return true if only a single workitem can be active in a wave. | |
bool | makeLIDRangeMetadata (Instruction *I) const |
Creates value range metadata on an workitemid.* intrinsic call or load. | |
unsigned | getImplicitArgNumBytes (const Function &F) const |
uint64_t | getExplicitKernArgSize (const Function &F, Align &MaxAlign) const |
unsigned | getKernArgSegmentSize (const Function &F, Align &MaxAlign) const |
AMDGPUDwarfFlavour | getAMDGPUDwarfFlavour () const |
virtual | ~AMDGPUSubtarget ()=default |
Static Public Member Functions | |
static const AMDGPUSubtarget & | get (const MachineFunction &MF) |
static const AMDGPUSubtarget & | get (const TargetMachine &TM, const Function &F) |
Protected Attributes | |
bool | GCN3Encoding = false |
bool | Has16BitInsts = false |
bool | HasTrue16BitInsts = false |
bool | HasFP8ConversionScaleInsts = false |
bool | HasBF8ConversionScaleInsts = false |
bool | HasFP4ConversionScaleInsts = false |
bool | HasFP6BF6ConversionScaleInsts = false |
bool | HasF16BF16ToFP6BF6ConversionScaleInsts = false |
bool | HasCvtPkF16F32Inst = false |
bool | HasF32ToF16BF16ConversionSRInsts = false |
bool | EnableRealTrue16Insts = false |
bool | HasBF16ConversionInsts = false |
bool | HasMadMixInsts = false |
bool | HasMadMacF32Insts = false |
bool | HasDsSrc2Insts = false |
bool | HasSDWA = false |
bool | HasVOP3PInsts = false |
bool | HasMulI24 = true |
bool | HasMulU24 = true |
bool | HasSMulHi = false |
bool | HasInv2PiInlineImm = false |
bool | HasFminFmaxLegacy = true |
bool | EnablePromoteAlloca = false |
bool | HasTrigReducedRange = false |
bool | FastFMAF32 = false |
unsigned | EUsPerCU = 4 |
unsigned | MaxWavesPerEU = 10 |
unsigned | LocalMemorySize = 0 |
unsigned | AddressableLocalMemorySize = 0 |
char | WavefrontSizeLog2 = 0 |
Definition at line 29 of file AMDGPUSubtarget.h.
Enumerator | |
---|---|
INVALID | |
R600 | |
R700 | |
EVERGREEN | |
NORTHERN_ISLANDS | |
SOUTHERN_ISLANDS | |
SEA_ISLANDS | |
VOLCANIC_ISLANDS | |
GFX9 | |
GFX10 | |
GFX11 | |
GFX12 |
Definition at line 31 of file AMDGPUSubtarget.h.
AMDGPUSubtarget::AMDGPUSubtarget | ( | Triple | TT | ) |
Definition at line 35 of file AMDGPUSubtarget.cpp.
|
virtualdefault |
|
static |
Definition at line 391 of file AMDGPUSubtarget.cpp.
References llvm::Triple::amdgcn, llvm::Triple::getArch(), llvm::MachineFunction::getSubtarget(), llvm::MachineFunction::getTarget(), and llvm::TargetMachine::getTargetTriple().
Referenced by llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute().
|
static |
Definition at line 397 of file AMDGPUSubtarget.cpp.
References llvm::Triple::amdgcn, and F.
|
inline |
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
For GFX10-GFX12 in WGP mode this is limited to 64k even though the WGP has 128k in total.
Definition at line 279 of file AMDGPUSubtarget.h.
References AddressableLocalMemorySize.
|
inline |
Definition at line 288 of file AMDGPUSubtarget.h.
References isAmdHsaOS().
Referenced by llvm::SITargetLowering::allocatePreloadKernArgSGPRs(), llvm::AMDGPUTargetLowering::getImplicitParameterOffset(), and getKernArgSegmentSize().
AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour | ( | ) | const |
WavefrontSize
. Definition at line 386 of file AMDGPUSubtarget.cpp.
References getWavefrontSize(), llvm::Wave32, and llvm::Wave64.
std::pair< unsigned, unsigned > AMDGPUSubtarget::getDefaultFlatWorkGroupSize | ( | CallingConv::ID | CC | ) | const |
Definition at line 145 of file AMDGPUSubtarget.cpp.
References llvm::CallingConv::AMDGPU_ES, llvm::CallingConv::AMDGPU_GS, llvm::CallingConv::AMDGPU_HS, llvm::CallingConv::AMDGPU_LS, llvm::CallingConv::AMDGPU_PS, llvm::CallingConv::AMDGPU_VS, CC, getMaxFlatWorkGroupSize(), and getWavefrontSize().
Referenced by getFlatWorkGroupSizes().
std::pair< unsigned, unsigned > AMDGPUSubtarget::getEffectiveWavesPerEU | ( | std::pair< unsigned, unsigned > | WavesPerEU, |
std::pair< unsigned, unsigned > | FlatWorkGroupSizes | ||
) | const |
Definition at line 182 of file AMDGPUSubtarget.cpp.
References llvm::Default, getMaxWavesPerEU(), getMinWavesPerEU(), and getWavesPerEUForWorkGroup().
Referenced by getWavesPerEU().
|
inline |
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto which workgroups are mapped.
This takes WGP mode vs. CU mode into account.
Definition at line 286 of file AMDGPUSubtarget.h.
References EUsPerCU.
Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithWorkGroupSizes().
Definition at line 339 of file AMDGPUSubtarget.cpp.
References llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, assert(), DL, F, and llvm::CallingConv::SPIR_KERNEL.
Referenced by getKernArgSegmentSize().
|
inline |
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition at line 294 of file AMDGPUSubtarget.h.
References llvm::Triple::AMDHSA, llvm::Triple::AMDPAL, llvm::Triple::getOS(), llvm_unreachable, llvm::Triple::Mesa3D, and llvm::Triple::UnknownOS.
Referenced by llvm::SITargetLowering::allocatePreloadKernArgSGPRs(), llvm::AMDGPUTargetLowering::getImplicitParameterOffset(), getKernArgSegmentSize(), and llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel().
F
, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F
.Definition at line 159 of file AMDGPUSubtarget.cpp.
References llvm::Default, F, getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerPairAttribute(), getMaxFlatWorkGroupSize(), and getMinFlatWorkGroupSize().
Referenced by getMaxLocalMemSizeWithWaveCount(), getMaxWorkitemID(), getOccupancyWithWorkGroupSizes(), getWavesPerEU(), and makeLIDRangeMetadata().
Definition at line 320 of file AMDGPUSubtarget.cpp.
References llvm::AMDGPU::AMDHSA_COV5, assert(), F, llvm::AMDGPU::getAMDHSACodeObjectVersion(), llvm::AMDGPU::isKernel(), and isMesaKernel().
Referenced by getKernArgSegmentSize().
Definition at line 364 of file AMDGPUSubtarget.cpp.
References llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, F, getAlignmentForImplicitArgPtr(), getExplicitKernArgSize(), getExplicitKernelArgOffset(), getImplicitArgNumBytes(), and llvm::CallingConv::SPIR_KERNEL.
Referenced by llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::getHSAKernelProps().
|
inline |
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
For GFX10-GFX12 in WGP mode this is 128k even though each workgroup is limited to 64k.
Definition at line 271 of file AMDGPUSubtarget.h.
References LocalMemorySize.
Referenced by getMaxLocalMemSizeWithWaveCount(), and getOccupancyWithWorkGroupSizes().
|
pure virtual |
Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getDefaultFlatWorkGroupSize(), and getFlatWorkGroupSizes().
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount | ( | unsigned | WaveCount, |
const Function & | F | ||
) | const |
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition at line 45 of file AMDGPUSubtarget.cpp.
References F, getEUsPerCU(), getFlatWorkGroupSizes(), getLocalMemorySize(), and getWavefrontSize().
SmallVector< unsigned > AMDGPUSubtarget::getMaxNumWorkGroups | ( | const Function & | F | ) | const |
Return the number of work groups for the function.
Definition at line 406 of file AMDGPUSubtarget.cpp.
References F, and llvm::AMDGPU::getIntegerVecAttribute().
|
inline |
Definition at line 331 of file AMDGPUSubtarget.h.
References MaxWavesPerEU.
Referenced by getEffectiveWavesPerEU(), getOccupancyWithWorkGroupSizes(), and getWavesPerEU().
|
pure virtual |
FlatWorkGroupSize
. Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getOccupancyWithWorkGroupSizes().
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition at line 235 of file AMDGPUSubtarget.cpp.
References getFlatWorkGroupSizes(), and getReqdWorkGroupSize().
Referenced by llvm::AMDGPUTargetLowering::computeKnownBitsForTargetNode(), llvm::GCNTTIImpl::isAlwaysUniform(), isSingleLaneExecution(), llvm::AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(), and llvm::SITargetLowering::passSpecialInputs().
|
pure virtual |
Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getFlatWorkGroupSizes().
|
pure virtual |
Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getEffectiveWavesPerEU().
std::pair< unsigned, unsigned > AMDGPUSubtarget::getOccupancyWithWorkGroupSizes | ( | const MachineFunction & | MF | ) | const |
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is MF
.
This notably depends on the range of allowed flat group sizes for the function, the amount of per-workgroup LDS space required by the function, and hardware characteristics.
Definition at line 138 of file AMDGPUSubtarget.cpp.
References llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), and getOccupancyWithWorkGroupSizes().
std::pair< unsigned, unsigned > AMDGPUSubtarget::getOccupancyWithWorkGroupSizes | ( | uint32_t | LDSBytes, |
const Function & | F | ||
) | const |
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only function running on a CU is F
and each workgroup running the function requires LDSBytes
bytes of LDS space.
This notably depends on the range of allowed flat group sizes for the function and hardware characteristics.
Definition at line 59 of file AMDGPUSubtarget.cpp.
References llvm::divideCeil(), F, getEUsPerCU(), getFlatWorkGroupSizes(), getLocalMemorySize(), getMaxWavesPerEU(), getMaxWorkGroupsPerCU(), getWavefrontSize(), and std::swap().
Referenced by llvm::GCNSchedStage::checkScheduling(), llvm::GCNSubtarget::computeOccupancy(), getOccupancyWithWorkGroupSizes(), llvm::SIRegisterInfo::getRegPressureLimit(), and llvm::PreRARematStage::initGCNSchedStage().
|
inline |
Definition at line 259 of file AMDGPUSubtarget.h.
References WavefrontSizeLog2.
Referenced by llvm::SIRegisterInfo::buildSpillLoadStore(), llvm::SIRegisterInfo::eliminateFrameIndex(), getAMDGPUDwarfFlavour(), getDefaultFlatWorkGroupSize(), llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV4::getHSAKernelProps(), getMaxLocalMemSizeWithWaveCount(), getOccupancyWithWorkGroupSizes(), llvm::GCNTTIImpl::instCombineIntrinsic(), llvm::PhiLoweringHelper::isLaneMaskReg(), llvm::GCNSubtarget::isWave32(), llvm::GCNSubtarget::isWave64(), llvm::AMDGPULegalizerInfo::legalizeIntrinsic(), llvm::SITargetLowering::LowerCall(), lowerFCMPIntrinsic(), lowerICMPIntrinsic(), and llvm::SITargetLowering::requiresUniformRegister().
|
inline |
Definition at line 263 of file AMDGPUSubtarget.h.
References WavefrontSizeLog2.
Referenced by llvm::SIRegisterInfo::eliminateFrameIndex(), llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex(), llvm::GCNSubtarget::initializeSubtargetDependencies(), llvm::GCNTTIImpl::isAlwaysUniform(), llvm::SITargetLowering::LowerDYNAMIC_STACKALLOC(), and llvm::GCNTTIImpl::simplifyDemandedLaneMaskArg().
|
inline |
F
, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F
.Definition at line 109 of file AMDGPUSubtarget.h.
References F, getFlatWorkGroupSizes(), and getWavesPerEU().
Referenced by llvm::GCNSubtarget::getMaxNumSGPRs(), llvm::GCNSubtarget::getMaxNumVGPRs(), and getWavesPerEU().
std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU | ( | const Function & | F, |
std::pair< unsigned, unsigned > | FlatWorkGroupSizes | ||
) | const |
Overload which uses the specified values for the flat work group sizes, rather than querying the function itself.
FlatWorkGroupSizes
Should correspond to the function's value for getFlatWorkGroupSizes.
Definition at line 213 of file AMDGPUSubtarget.cpp.
References llvm::Default, F, getEffectiveWavesPerEU(), llvm::AMDGPU::getIntegerPairAttribute(), and getMaxWavesPerEU().
|
pure virtual |
FlatWorkGroupSize
. Implemented in llvm::GCNSubtarget, and llvm::R600Subtarget.
Referenced by getEffectiveWavesPerEU().
|
inline |
Definition at line 172 of file AMDGPUSubtarget.h.
References Has16BitInsts.
Referenced by llvm::AMDGPULegalizerInfo::AMDGPULegalizerInfo(), llvm::AMDGPUTargetLowering::AMDGPUTargetLowering(), llvm::GCNTTIImpl::getArithmeticInstrCost(), llvm::SITargetLowering::getAsmOperandConstVal(), llvm::GCNTTIImpl::getIntrinsicInstrCost(), llvm::GCNTTIImpl::getMaximumVF(), llvm::SITargetLowering::getNumRegistersForCallingConv(), llvm::SITargetLowering::getPreferredShiftAmountTy(), llvm::SITargetLowering::getRegisterTypeForCallingConv(), llvm::GCNTTIImpl::getVectorInstrCost(), llvm::SITargetLowering::getVectorTypeBreakdownForCallingConv(), llvm::AMDGPUTargetLowering::isFAbsFree(), llvm::SITargetLowering::isFMAFasterThanFMulAndFAdd(), llvm::AMDGPUTargetLowering::isFPImmLegal(), llvm::SIInstrInfo::isInlineConstant(), llvm::AMDGPUTargetLowering::isNarrowingProfitable(), llvm::AMDGPUTargetLowering::isTruncateFree(), llvm::SITargetLowering::isTypeDesirableForOp(), llvm::AMDGPUTargetLowering::isZExtFree(), llvm::AMDGPULegalizerInfo::legalizeFlogCommon(), llvm::AMDGPULegalizerInfo::legalizeFSQRTF16(), llvm::AMDGPUTargetLowering::lowerFEXP2(), llvm::AMDGPUTargetLowering::LowerFLOG2(), llvm::AMDGPUTargetLowering::LowerFLOGCommon(), llvm::AMDGPUTargetLowering::LowerSINT_TO_FP(), llvm::AMDGPUTargetLowering::LowerUINT_TO_FP(), llvm::AMDGPUTargetLowering::performFAbsCombine(), llvm::AMDGPUTargetLowering::performMulCombine(), and llvm::SITargetLowering::SITargetLowering().
|
inline |
Definition at line 187 of file AMDGPUSubtarget.h.
References HasBF16ConversionInsts.
Referenced by llvm::SITargetLowering::SITargetLowering().
|
inline |
Definition at line 197 of file AMDGPUSubtarget.h.
References HasBF8ConversionScaleInsts.
|
inline |
Definition at line 205 of file AMDGPUSubtarget.h.
References HasCvtPkF16F32Inst.
Referenced by llvm::AMDGPULegalizerInfo::AMDGPULegalizerInfo(), and llvm::SITargetLowering::SITargetLowering().
|
inline |
Definition at line 215 of file AMDGPUSubtarget.h.
References HasDsSrc2Insts.
|
inline |
Definition at line 203 of file AMDGPUSubtarget.h.
References HasF16BF16ToFP6BF6ConversionScaleInsts.
|
inline |
Definition at line 207 of file AMDGPUSubtarget.h.
References HasF32ToF16BF16ConversionSRInsts.
|
inline |
Definition at line 251 of file AMDGPUSubtarget.h.
References FastFMAF32.
Referenced by llvm::GCNTTIImpl::getIntrinsicInstrCost(), llvm::SITargetLowering::isFMAFasterThanFMulAndFAdd(), llvm::AMDGPULegalizerInfo::legalizeFExp(), llvm::AMDGPULegalizerInfo::legalizeFlogCommon(), llvm::AMDGPULegalizerInfo::legalizeFlogUnsafe(), llvm::AMDGPUTargetLowering::lowerFEXP(), llvm::AMDGPUTargetLowering::LowerFLOGCommon(), and llvm::AMDGPUTargetLowering::LowerFLOGUnsafe().
|
inline |
Definition at line 243 of file AMDGPUSubtarget.h.
References HasFminFmaxLegacy.
Referenced by llvm::AMDGPUTargetLowering::performSelectCombine().
|
inline |
Definition at line 199 of file AMDGPUSubtarget.h.
References HasFP4ConversionScaleInsts.
|
inline |
Definition at line 201 of file AMDGPUSubtarget.h.
References HasFP6BF6ConversionScaleInsts.
|
inline |
Definition at line 195 of file AMDGPUSubtarget.h.
References HasFP8ConversionScaleInsts.
|
inline |
Definition at line 239 of file AMDGPUSubtarget.h.
References HasInv2PiInlineImm.
Referenced by llvm::SITargetLowering::checkAsmConstraintValA(), llvm::SIRegisterInfo::eliminateFrameIndex(), llvm::AMDGPUTargetLowering::getConstantNegateCost(), llvm::SIInstrInfo::isInlineConstant(), and llvm::SIInstrInfo::isOperandLegal().
|
inline |
Definition at line 211 of file AMDGPUSubtarget.h.
References HasMadMacF32Insts, and isGCN().
Referenced by llvm::AMDGPULegalizerInfo::AMDGPULegalizerInfo(), llvm::GCNTTIImpl::getArithmeticInstrCost(), llvm::SITargetLowering::isFMADLegal(), llvm::SITargetLowering::isFMAFasterThanFMulAndFAdd(), llvm::AMDGPULegalizerInfo::legalizeFDIV16(), llvm::AMDGPUTargetLowering::LowerDIVREM24(), llvm::AMDGPUTargetLowering::LowerUDIVREM64(), and llvm::SITargetLowering::SITargetLowering().
|
inline |
Definition at line 191 of file AMDGPUSubtarget.h.
References HasMadMixInsts.
Referenced by llvm::SITargetLowering::isFPExtFoldable().
|
inline |
Definition at line 227 of file AMDGPUSubtarget.h.
References HasMulI24.
Referenced by llvm::AMDGPUTargetLowering::performMulCombine(), llvm::AMDGPUTargetLowering::performMulhsCombine(), and llvm::AMDGPUTargetLowering::performMulLoHiCombine().
|
inline |
Definition at line 231 of file AMDGPUSubtarget.h.
References HasMulU24.
Referenced by llvm::AMDGPUTargetLowering::performMulCombine(), llvm::AMDGPUTargetLowering::performMulhuCombine(), and llvm::AMDGPUTargetLowering::performMulLoHiCombine().
|
inline |
Definition at line 219 of file AMDGPUSubtarget.h.
References HasSDWA.
Referenced by llvm::AMDGPUTargetLowering::PerformDAGCombine(), and llvm::SIInstrInfo::verifyInstruction().
|
inline |
Definition at line 235 of file AMDGPUSubtarget.h.
References HasSMulHi.
Referenced by llvm::AMDGPURegisterBankInfo::applyMappingMAD_64_32(), llvm::AMDGPUTargetLowering::performMulhsCombine(), and llvm::AMDGPUTargetLowering::performMulhuCombine().
|
inline |
Definition at line 247 of file AMDGPUSubtarget.h.
References HasTrigReducedRange.
Referenced by llvm::AMDGPULegalizerInfo::legalizeSinCos().
|
inline |
Return true if the subtarget supports True16 instructions.
Definition at line 177 of file AMDGPUSubtarget.h.
References HasTrue16BitInsts.
Referenced by llvm::SIInstrInfo::convertToThreeAddress(), llvm::SIInstrInfo::copyPhysReg(), llvm::SIInstrInfo::foldImmediate(), and useRealTrue16Insts().
|
inline |
Definition at line 223 of file AMDGPUSubtarget.h.
References HasVOP3PInsts.
Referenced by llvm::AMDGPULegalizerInfo::AMDGPULegalizerInfo(), llvm::GCNTTIImpl::getArithmeticReductionCost(), llvm::GCNTTIImpl::getMinMaxReductionCost(), llvm::GCNTTIImpl::getShuffleCost(), llvm::AMDGPUTargetLowering::isNarrowingProfitable(), and llvm::SITargetLowering::SITargetLowering().
Definition at line 160 of file AMDGPUSubtarget.h.
References F, isAmdHsaOS(), and isMesaKernel().
|
inline |
Definition at line 146 of file AMDGPUSubtarget.h.
References llvm::Triple::AMDHSA, and llvm::Triple::getOS().
Referenced by getAlignmentForImplicitArgPtr(), llvm::SIInstrInfo::getDefaultRsrcDataFormat(), llvm::GCNSubtarget::getTrapHandlerAbi(), llvm::GCNSubtarget::initializeSubtargetDependencies(), isAmdHsaOrMesa(), llvm::SITargetLowering::isOffsetFoldingLegal(), llvm::SITargetLowering::LowerFormalArguments(), and llvm::AMDGPUAsmPrinter::runOnMachineFunction().
|
inline |
Definition at line 150 of file AMDGPUSubtarget.h.
References llvm::Triple::AMDPAL, and llvm::Triple::getOS().
Referenced by llvm::AMDGPULegalizerInfo::legalizeGlobalValue(), llvm::SITargetLowering::LowerFormalArguments(), llvm::AMDGPUAsmPrinter::runOnMachineFunction(), and llvm::SITargetLowering::shouldEmitGOTReloc().
|
inline |
Definition at line 164 of file AMDGPUSubtarget.h.
References llvm::Triple::amdgcn, and llvm::Triple::getArch().
Referenced by hasMadMacF32Insts(), llvm::AMDGPUTargetLowering::LowerDIVREM24(), and llvm::AMDGPUTargetLowering::LowerINT_TO_FP32().
|
inline |
Definition at line 168 of file AMDGPUSubtarget.h.
References GCN3Encoding.
|
inline |
Definition at line 154 of file AMDGPUSubtarget.h.
References llvm::Triple::getOS(), and llvm::Triple::Mesa3D.
Referenced by llvm::GCNSubtarget::isMesaGfxShader(), isMesaKernel(), llvm::AMDGPULegalizerInfo::legalizeGlobalValue(), and llvm::SITargetLowering::shouldEmitGOTReloc().
Definition at line 231 of file AMDGPUSubtarget.cpp.
References F, isMesa3DOS(), and llvm::AMDGPU::isShader().
Referenced by getImplicitArgNumBytes(), and isAmdHsaOrMesa().
|
inline |
Definition at line 255 of file AMDGPUSubtarget.h.
References EnablePromoteAlloca.
Return true if only a single workitem can be active in a wave.
Definition at line 243 of file AMDGPUSubtarget.cpp.
References getMaxWorkitemID(), and I.
Referenced by llvm::GCNTTIImpl::hasBranchDivergence().
bool AMDGPUSubtarget::makeLIDRangeMetadata | ( | Instruction * | I | ) | const |
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition at line 252 of file AMDGPUSubtarget.cpp.
References llvm::MDBuilder::createRange(), F, getFlatWorkGroupSizes(), getReqdWorkGroupSize(), I, if(), llvm::Lower, Range, and llvm::Upper.
bool AMDGPUSubtarget::useRealTrue16Insts | ( | ) | const |
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-generated.
Fake True16 instructions are identical to non-fake ones except that they take 32-bit registers as operands and always use their low halves.
Definition at line 37 of file AMDGPUSubtarget.cpp.
References EnableRealTrue16Insts, and hasTrue16BitInsts().
Referenced by llvm::SIRegisterInfo::getRegClassForSizeOnBank(), llvm::SIInstrInfo::getVALUOp(), llvm::SIInstrInfo::moveToVALUImpl(), and llvm::SITargetLowering::SITargetLowering().
|
protected |
Definition at line 78 of file AMDGPUSubtarget.h.
Referenced by getAddressableLocalMemorySize(), llvm::GCNSubtarget::initializeSubtargetDependencies(), and llvm::R600Subtarget::R600Subtarget().
|
protected |
Definition at line 72 of file AMDGPUSubtarget.h.
Referenced by isPromoteAllocaEnabled().
|
protected |
Definition at line 60 of file AMDGPUSubtarget.h.
Referenced by useRealTrue16Insts().
|
protected |
Definition at line 75 of file AMDGPUSubtarget.h.
Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getEUsPerCU().
|
protected |
Definition at line 74 of file AMDGPUSubtarget.h.
Referenced by hasFastFMAF32().
|
protected |
Definition at line 50 of file AMDGPUSubtarget.h.
Referenced by isGCN3Encoding().
|
protected |
Definition at line 51 of file AMDGPUSubtarget.h.
Referenced by has16BitInsts().
|
protected |
Definition at line 61 of file AMDGPUSubtarget.h.
Referenced by hasBF16ConversionInsts().
|
protected |
Definition at line 54 of file AMDGPUSubtarget.h.
Referenced by hasBF8ConversionScaleInsts().
|
protected |
Definition at line 58 of file AMDGPUSubtarget.h.
Referenced by hasCvtPkF16F32Inst().
|
protected |
Definition at line 64 of file AMDGPUSubtarget.h.
Referenced by hasDsSrc2Insts().
|
protected |
Definition at line 57 of file AMDGPUSubtarget.h.
Referenced by hasF16BF16ToFP6BF6ConversionScaleInsts().
|
protected |
Definition at line 59 of file AMDGPUSubtarget.h.
Referenced by hasF32ToF16BF16ConversionSRInsts().
Definition at line 71 of file AMDGPUSubtarget.h.
Referenced by hasFminFmaxLegacy(), and llvm::GCNSubtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 55 of file AMDGPUSubtarget.h.
Referenced by hasFP4ConversionScaleInsts().
|
protected |
Definition at line 56 of file AMDGPUSubtarget.h.
Referenced by hasFP6BF6ConversionScaleInsts().
|
protected |
Definition at line 53 of file AMDGPUSubtarget.h.
Referenced by hasFP8ConversionScaleInsts().
|
protected |
Definition at line 70 of file AMDGPUSubtarget.h.
Referenced by hasInv2PiInlineImm().
|
protected |
Definition at line 63 of file AMDGPUSubtarget.h.
Referenced by hasMadMacF32Insts().
|
protected |
Definition at line 62 of file AMDGPUSubtarget.h.
Referenced by hasMadMixInsts().
Definition at line 67 of file AMDGPUSubtarget.h.
Referenced by hasMulI24(), and llvm::R600Subtarget::initializeSubtargetDependencies().
Definition at line 68 of file AMDGPUSubtarget.h.
Referenced by hasMulU24(), and llvm::R600Subtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 65 of file AMDGPUSubtarget.h.
Referenced by hasSDWA().
|
protected |
Definition at line 69 of file AMDGPUSubtarget.h.
Referenced by hasSMulHi(), and llvm::GCNSubtarget::initializeSubtargetDependencies().
|
protected |
Definition at line 73 of file AMDGPUSubtarget.h.
Referenced by hasTrigReducedRange().
|
protected |
Definition at line 52 of file AMDGPUSubtarget.h.
Referenced by hasTrue16BitInsts().
|
protected |
Definition at line 66 of file AMDGPUSubtarget.h.
Referenced by hasVOP3PInsts().
|
protected |
Definition at line 77 of file AMDGPUSubtarget.h.
Referenced by getLocalMemorySize(), llvm::GCNSubtarget::initializeSubtargetDependencies(), and llvm::R600Subtarget::R600Subtarget().
|
protected |
Definition at line 76 of file AMDGPUSubtarget.h.
Referenced by llvm::GCNSubtarget::GCNSubtarget(), and getMaxWavesPerEU().
|
protected |
Definition at line 79 of file AMDGPUSubtarget.h.
Referenced by getWavefrontSize(), getWavefrontSizeLog2(), and llvm::GCNSubtarget::initializeSubtargetDependencies().