Tag Archives: MGPU

naiveScan

Segmented Warp Scan with Packed Head Flags

Here is a simple test implementation of a segmented scan in warp with packed head flag. The idea is from the segmented warp scan function.

Here is the implementation in F#.

[sourcecode language=”fsharp”]
open NUnit.Framework
open Alea.Interop.LLVM
open Alea.CUDA
open Alea.CUDA.Extension

// define an alias
module IRB = Alea.CUDA.IRBuilderUtil
module DF = Alea.CUDA.DeviceFunction

// get the worker
let worker = getDefaultWorker()

[<IRB.LLVMFunctionBuilder>]
let bfi(x:int, y:int, bit:int, numBits:int):int = failwith "Device Only!"
let bfi [BUILDER](ctx:IRB.LLVMFunctionBuilderContext) =
let args = ctx.LLVMValueArgs // arguments LLVM values
let i32t = ctx.LLVMHelper.i32_t // int LLVM type
let rett = i32t // return type
let argst = [| i32t; i32t; i32t; i32t |] // argument type list
let funct = LLVMFunctionTypeEx(rett, argst, 0)
let funcp = LLVMConstInlineAsm(funct, "bfi.b32 \t$0, $2, $1, $3, $4;", "=r,r,r,r,r", 0, 0)
IRB.Value(LLVMBuildCallEx(ctx.Builder, funcp, args, ""))

[<ReflectedDefinition>]
let segscanWarp (inputs:DevicePtr<int>) (outputs:DevicePtr<int>) (distances:DevicePtr<int>) =
let tid = threadIdx.x
let packed = inputs.[tid]

[<Test>]
let test() =
let blockSize = 256
let numWarps = blockSize / Util.WARP_SIZE
let s x = x ||| (1 <<< 31)
let hInputs =
[|
s 3; 0; 3; 3; 0; s 1; 2; 0; 3; 3; 3; 2; 3; 0; 3; 1;
0; 0; 2; 3; 2; s 3; 1; 0; 2; 1; 2; 1; 1; 0; 1; s 3;
|]

[/sourcecode]