[toc]

# ComputeShader 驱动的粒子效果

# DrawProcedureNow 接口

可以使用 DrawProcedureNow 接口来绘制粒子效果 (但是只有 ES4.5 以上才可能支持), 但是这个其实完全可以用 instance 来代替,主要的作用是: 不需要输入顶点,顶点数量是procedure中指定的,其他的数据都是从SSBO中获取的 ,而牵扯到 SSBO , 那就估计就要 ES5.0 以后才可以完全支持
DrawProcedure 触发的时候,可以不用每帧提交

# 基于 point 的粒子

通常粒子的数据只需要一个简单的结构体

1
2
3
4
5
struct Particle{
float3 position;
float3 velocity;
float life;
};

即位置,速度,生命时间 基于点的渲染可以直接在顶点着色器中控制点的大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
v2f vert(uint vertex_id : SV_VertexID, uint instance_id : SV_InstanceID)
{
v2f o = (v2f)0;

// Color
float life = particleBuffer[instance_id].life;
float lerpVal = life * 0.25f;
o.color = fixed4(1.0f - lerpVal+0.1, lerpVal+0.1, 1.0f, lerpVal);

// Position
o.position = UnityObjectToClipPos(float4(particleBuffer[instance_id].position, 1.0f));
o.size = _PointSize;

return o;
}

其中, o.size 就是用来设置点的大小的, o.position 是输出的点的位置

粒子的提交渲染接口如下

1
Graphics.DrawProceduralNow(MeshTopology.Points, 1, particleCount);

当时用点渲染时,只需要提供
1. 渲染模式.
2. 顶点个数
3. 程序化的数量 但是由上 我们可以看到, DrawProceduralNow 没有传入材质相关的数据,所以在调用这个接口之前,需要先打开一个材质, DrawProceduralNow 接口会直接找到最近打开的第一个材质 (? 很迷糊的提交方式)

1
2
3
4
5
void OnRenderObject()
{
material.SetPass(0);
Graphics.DrawProceduralNow(MeshTopology.Points, 1, particleCount);
}

借助上述基础,我们只需要每帧计算一下 ComputeShader 即可

  • 设置材质球
  • 开始材质球的粒子渲染 DrawProcedralNow
  • 将材质球和 ComputeShader 的 SSBO 连到一起
  • 每帧分派计算 ComputeShader

# 完整代码

Csharp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class ParticleFun : MonoBehaviour
{

private Vector2 cursorPos;

// struct
struct Particle
{
public Vector3 position;
public Vector3 velocity;
public float life;
}

const int SIZE_PARTICLE = 7 * sizeof(float);

public int particleCount = 1000000;
public Material material;
public ComputeShader shader;
[Range(1, 10)]
public int pointSize = 2;

int kernelID;
ComputeBuffer particleBuffer;

int groupSizeX;


// Use this for initialization
void Start()
{
Init();
}

void Init()
{
// initialize the particles
Particle[] particleArray = new Particle[particleCount];

for (int i = 0; i < particleCount; i++)
{
float x = Random.value * 2 - 1.0f;
float y = Random.value * 2 - 1.0f;
float z = Random.value * 2 - 1.0f;
Vector3 xyz = new Vector3(x, y, z);
xyz.Normalize();
xyz *= Random.value;
xyz *= 0.5f;


particleArray[i].position.x = xyz.x;
particleArray[i].position.y = xyz.y;
particleArray[i].position.z = xyz.z + 3;

particleArray[i].velocity.x = 0;
particleArray[i].velocity.y = 0;
particleArray[i].velocity.z = 0;

// Initial life value
particleArray[i].life = Random.value * 5.0f + 1.0f;
}

// create compute buffer
particleBuffer = new ComputeBuffer(particleCount, SIZE_PARTICLE);

particleBuffer.SetData(particleArray);

// find the id of the kernel
kernelID = shader.FindKernel("CSParticle");

uint threadsX;
shader.GetKernelThreadGroupSizes(kernelID, out threadsX, out _, out _);
groupSizeX = Mathf.CeilToInt((float)particleCount / (float)threadsX);

// bind the compute buffer to the shader and the compute shader
shader.SetBuffer(kernelID, "particleBuffer", particleBuffer);
material.SetBuffer("particleBuffer", particleBuffer);

material.SetInt("_PointSize", pointSize);
}

void OnRenderObject()
{
material.SetPass(0);
Graphics.DrawProceduralNow(MeshTopology.Points, 1, particleCount);
}

void OnDestroy()
{
if (particleBuffer != null)
particleBuffer.Release();
}

// Update is called once per frame
void Update()
{

float[] mousePosition2D = { cursorPos.x, cursorPos.y };

// Send datas to the compute shader
shader.SetFloat("deltaTime", Time.deltaTime);
shader.SetFloats("mousePosition", mousePosition2D);

// Update the Particles
shader.Dispatch(kernelID, groupSizeX, 1, 1);
}

void OnGUI()
{
Vector3 p = new Vector3();
Camera c = Camera.main;
Event e = Event.current;
Vector2 mousePos = new Vector2();

// Get the mouse position from Event.
// Note that the y position from Event is inverted.
mousePos.x = e.mousePosition.x;
mousePos.y = c.pixelHeight - e.mousePosition.y;

p = c.ScreenToWorldPoint(new Vector3(mousePos.x, mousePos.y, c.nearClipPlane + 14));// z = 3.

cursorPos.x = p.x;
cursorPos.y = p.y;

}
}

ComputeShader

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// Each #kernel tells which function to compile; you can have many kernels
#pragma kernel CSParticle

struct Particle
{
float3 position;
float3 velocity;
float life;
};

RWStructuredBuffer<Particle> particleBuffer;

float deltaTime;
float2 mousePosition;

uint rng_state;

uint rand_xorshift()
{
//Xorshift algorithm from George Marsaglia's paper
rng_state ^= (rng_state << 13);
rng_state ^= (rng_state >> 17);
rng_state ^= (rng_state << 5);
return rng_state;
}

void respawn(uint id)
{
rng_state = id;
//max int
float tmp = (1.0 / 4294967296.0);
float f0 = float(rand_xorshift()) * tmp - 0.5;
float f1 = float(rand_xorshift()) * tmp - 0.5;
float f2 = float(rand_xorshift()) * tmp - 0.5;
float3 normalF3 = normalize(float3(f0,f1,f2)) * 0.8f;
normalF3 *= float(rand_xorshift()) * tmp;
particleBuffer[id].position = float3(normalF3.x + mousePosition.x,
normalF3.y + mousePosition.y,normalF3.z + 3.0);
particleBuffer[id].life = 4;
particleBuffer[id].velocity = float3(0,0,0);
}

[numthreads(256,1,1)]
void CSParticle (uint3 id : SV_DispatchThreadID)
{
Particle particle = particleBuffer[id.x];

particle.life -= deltaTime;

float3 delta = float3(mousePosition.xy,3) - particle.position;
float3 dir = normalize(delta);

particle.velocity += dir;
particle.position += particle.velocity * deltaTime;

particleBuffer[id.x] = particle;
if(particle.life < 0) respawn(id.x);
}

Shader

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
Shader "Custom/Particle" {
Properties
{
_PointSize("Point size", Float) = 5.0
}

SubShader {
Pass {
Tags{ "RenderType" = "Opaque" }
LOD 200
Blend SrcAlpha one

CGPROGRAM
// Physically based Standard lighting model, and enable shadows on all light types
#pragma vertex vert
#pragma fragment frag

uniform float _PointSize;

#include "UnityCG.cginc"

// Use shader model 3.0 target, to get nicer looking lighting
#pragma target 5.0

struct Particle{
float3 position;
float3 velocity;
float life;
};

struct v2f{
float4 position : SV_POSITION;
float4 color : COLOR;
float life : LIFE;
float size: PSIZE;
};
// particles' data
StructuredBuffer<Particle> particleBuffer;


v2f vert(uint vertex_id : SV_VertexID, uint instance_id : SV_InstanceID)
{
v2f o = (v2f)0;

// Color
float life = particleBuffer[instance_id].life;
float lerpVal = life * 0.25f;
o.color = fixed4(1.0f - lerpVal+0.1, lerpVal+0.1, 1.0f, lerpVal);

// Position
o.position = UnityObjectToClipPos(float4(particleBuffer[instance_id].position, 1.0f));
o.size = _PointSize;

return o;
}

float4 frag(v2f i) : COLOR
{
return i.color;
}


ENDCG
}
}
FallBack Off
}

# 将 Point 改成 Quad

大部分还和上述一样,但是将顶点的数据放到 Procedure 上

1
2
3
4
5
6
7
8
9
10
11
12
// create compute buffers
particleBuffer = new ComputeBuffer(numParticles, SIZE_PARTICLE);
particleBuffer.SetData(particleArray);
vertexBuffer = new ComputeBuffer(numVertices, SIZE_VERTEX);
vertexBuffer.SetData(vertexArray);

// bind the compute buffers to the shader and the compute shader
shader.SetBuffer(kernelID, "particleBuffer", particleBuffer);
shader.SetBuffer(kernelID, "vertexBuffer", vertexBuffer);
shader.SetFloat("halfSize", quadSize*0.5f);

material.SetBuffer("vertexBuffer", vertexBuffer);

然后依然调用 DrawProceduralNow , 但是使用 Triangles 模式

1
2
3
4
5
void OnRenderObject()
{
material.SetPass(0);
Graphics.DrawProceduralNow(MeshTopology.Triangles, 6, numParticles);
}
    1. 先生成顶点缓冲区,将 ComputeBuffer 和 Shader 的顶点 SSBO 联立在一起
    1. 设置 Traingle 模式调用 DrawProceduralNow
    1. 每帧 Dispatch ComputeShader, 在 CS 中计算每个粒子的顶点
    1. Shader 自行读取缓冲区并且渲染