C# - Использование вычислительных шейдеров

Я пытаюсь реализовать, используя SharpDX11, метод пересечения луча / сетки с использованием графического процессора. Я видел из старого поста ( более старого поста), что это можно сделать с помощью Compute Shader; но мне нужна помощь для создания и определения буфера вне кода.hlsl.

Мой код HLSL следующий:

struct rayHit
     float3 intersection;

cbuffer cbRaySettings : register(b0)
    float3 rayFrom;
    float3 rayDir;
    uint TriangleCount;

StructuredBuffer<float3> positionBuffer : register(t0);
StructuredBuffer<uint3> indexBuffer : register(t1);

AppendStructuredBuffer<rayHit> appendRayHitBuffer : register(u0);

void TestTriangle(float3 p1, float3 p2, float3 p3, out bool hit, out float3 intersection)
//Perform ray/triangle intersection
//Compute vectors along two edges of the triangle.
float3 edge1, edge2;
float distance;

//Edge 1
edge1.x = p2.x - p1.x;
edge1.y = p2.y - p1.y;
edge1.z = p2.z - p1.z;

edge2.x = p3.x - p1.x;
edge2.y = p3.y - p1.y;
edge2.z = p3.z - p1.z;

//Cross product of ray direction and edge2 - first part of determinant.
float3 directioncrossedge2;
directioncrossedge2.x = (rayDir.y * edge2.z) - (rayDir.z * edge2.y);
directioncrossedge2.y = (rayDir.z * edge2.x) - (rayDir.x * edge2.z);
directioncrossedge2.z = (rayDir.x * edge2.y) - (rayDir.y * edge2.x);

//Compute the determinant.
float determinant;
//Dot product of edge1 and the first part of determinant.
determinant = (edge1.x * directioncrossedge2.x) + (edge1.y * directioncrossedge2.y) + (edge1.z * directioncrossedge2.z);

//If the ray is parallel to the triangle plane, there is no collision.
//This also means that we are not culling, the ray may hit both the
//back and the front of the triangle.
if (determinant == 0)
    distance = 0.0f;
    intersection = float3(0, 0, 0);
    hit = false;

float inversedeterminant = 1.0f / determinant;

//Calculate the U parameter of the intersection point.
float3 distanceVector;
distanceVector.x = rayFrom.x - p1.x;
distanceVector.y = rayFrom.y - p1.y;
distanceVector.z = rayFrom.z - p1.z;

float triangleU;
triangleU = (distanceVector.x * directioncrossedge2.x) + (distanceVector.y * directioncrossedge2.y) + (distanceVector.z * directioncrossedge2.z);
triangleU = triangleU * inversedeterminant;

//Make sure it is inside the triangle.
if (triangleU < 0.0f || triangleU > 1.0f)
    distance = 0.0f;
    intersection = float3(0, 0, 0);
    hit = false;

//Calculate the V parameter of the intersection point.
float3 distancecrossedge1;
distancecrossedge1.x = (distanceVector.y * edge1.z) - (distanceVector.z * edge1.y);
distancecrossedge1.y = (distanceVector.z * edge1.x) - (distanceVector.x * edge1.z);
distancecrossedge1.z = (distanceVector.x * edge1.y) - (distanceVector.y * edge1.x);

float triangleV;
triangleV = ((rayDir.x * distancecrossedge1.x) + (rayDir.y * distancecrossedge1.y)) + (rayDir.z * distancecrossedge1.z);
triangleV = triangleV * inversedeterminant;

//Make sure it is inside the triangle.
if (triangleV < 0.0f || triangleU + triangleV > 1.0f)
    distance = 0.0f;
    intersection = float3(0, 0, 0);
    hit = false;

//Compute the distance along the ray to the triangle.
float raydistance;
raydistance = (edge2.x * distancecrossedge1.x) + (edge2.y * distancecrossedge1.y) + (edge2.z * distancecrossedge1.z);
raydistance = raydistance * inversedeterminant;

//Is the triangle behind the ray origin?
if (raydistance < 0.0f)
    distance = 0.0f;
    intersection = float3(0, 0, 0);
    hit = false;

intersection = rayFrom + (rayDir * distance);
hit = true;

[numthreads(64, 1, 1)]
void CS_RayAppend(uint3 tid : SV_DispatchThreadID)
if (tid.x >= TriangleCount)

uint3 indices = indexBuffer[tid.x];
float3 p1 = positionBuffer[indices.x];
float3 p2 = positionBuffer[indices.y];
float3 p3 = positionBuffer[indices.z];

bool hit;
float3 p;
TestTriangle(p1, p2, p3, hit, p);

if (hit)
    rayHit hitData;
    hitData.intersection = p;

Хотя следующее является частью моей реализации C#, но я не могу понять, как загрузить буферы для вычислительных шейдеров.

int count = obj.Mesh.Triangles.Count;
        int size = 8; //int+float for every hit
        BufferDescription bufferDesc = new BufferDescription() {
            BindFlags = BindFlags.UnorderedAccess | BindFlags.ShaderResource,
            Usage = ResourceUsage.Default,
            CpuAccessFlags = CpuAccessFlags.None,
            OptionFlags = ResourceOptionFlags.BufferStructured,
            StructureByteStride = size,
            SizeInBytes = size * count
        Buffer buffer = new Buffer(device, bufferDesc);
        UnorderedAccessViewDescription uavDescription = new UnorderedAccessViewDescription() {
            Buffer = new UnorderedAccessViewDescription.BufferResource() { FirstElement = 0, Flags = UnorderedAccessViewBufferFlags.None, ElementCount = count },
            Format = SharpDX.DXGI.Format.Unknown,
            Dimension = UnorderedAccessViewDimension.Buffer
        UnorderedAccessView uav = new UnorderedAccessView(device, buffer, uavDescription);
        context.ComputeShader.SetUnorderedAccessView(0, uav);

        var code = HLSLCompiler.CompileFromFile(@"Shaders\TestTriangle.hlsl", "CS_RayAppend", "cs_5_0");
        ComputeShader _shader = new ComputeShader(device, code);
        Buffer positionsBuffer = new Buffer(device, Utilities.SizeOf<Vector3>(), ResourceUsage.Default, BindFlags.None, CpuAccessFlags.None, ResourceOptionFlags.None, 0);
        context.UpdateSubresource(ref data, positionsBuffer);

В моей реализации C# я рассматриваю только один луч (с его происхождением и направлением), и я хотел бы использовать шейдер, чтобы проверить пересечение со всеми треугольниками сетки. Я уже могу сделать это, используя процессор, но для треугольников 20k+ вычисление заняло слишком много времени, даже если я уже использую параллельное кодирование.

