forked from ispc/ispc.github.com
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mandelbrot.txt
197 lines (195 loc) · 7.12 KB
/
mandelbrot.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__const
.align 5
LCPI1_0:
.long 0 ## 0x0
.long 1 ## 0x1
.long 2 ## 0x2
.long 3 ## 0x3
.long 4 ## 0x4
.long 5 ## 0x5
.long 6 ## 0x6
.long 7 ## 0x7
LCPI1_1:
.long 1 ## 0x1
.long 1 ## 0x1
.long 1 ## 0x1
.long 1 ## 0x1
.long 1 ## 0x1
.long 1 ## 0x1
.long 1 ## 0x1
.long 1 ## 0x1
LCPI1_2:
.long 1082130432 ## float 4.000000e+00
.long 1082130432 ## float 4.000000e+00
.long 1082130432 ## float 4.000000e+00
.long 1082130432 ## float 4.000000e+00
.long 1082130432 ## float 4.000000e+00
.long 1082130432 ## float 4.000000e+00
.long 1082130432 ## float 4.000000e+00
.long 1082130432 ## float 4.000000e+00
LCPI1_3:
.long 1073741824 ## float 2.000000e+00
.long 1073741824 ## float 2.000000e+00
.long 1073741824 ## float 2.000000e+00
.long 1073741824 ## float 2.000000e+00
.long 1073741824 ## float 2.000000e+00
.long 1073741824 ## float 2.000000e+00
.long 1073741824 ## float 2.000000e+00
.long 1073741824 ## float 2.000000e+00
.section __TEXT,__text,regular,pure_instructions
.globl _mandelbrot_ispc
.align 4, 0x90
_mandelbrot_ispc: ## @mandelbrot_ispc
## BB#0: ## %allocas
pushq %rbp
movq %rsp, %rbp
andq $-32, %rsp
subq $192, %rsp
## kill: XMM1<def> XMM1<kill> XMM1<def> YMM1<def>
## kill: XMM0<def> XMM0<kill> XMM0<def> YMM0<def>
vcvtsi2ss %edi, %xmm0, %xmm4
vsubss %xmm0, %xmm2, %xmm2
vdivss %xmm4, %xmm2, %xmm2
vcvtsi2ss %esi, %xmm0, %xmm4
vsubss %xmm1, %xmm3, %xmm3
vdivss %xmm4, %xmm3, %xmm3
testl %esi, %esi
jle LBB1_10
## BB#1: ## %for_test49.preheader.lr.ph
vinsertf128 $1, %xmm3, %ymm3, %ymm3
vpermilps $0, %ymm3, %ymm3 ## ymm3 = ymm3[0,0,0,0,4,4,4,4]
vmovaps %ymm3, 32(%rsp) ## 32-byte Spill
vinsertf128 $1, %xmm2, %ymm2, %ymm2
vpermilps $0, %ymm2, %ymm2 ## ymm2 = ymm2[0,0,0,0,4,4,4,4]
vmovaps %ymm2, 128(%rsp) ## 32-byte Spill
vmovd %edx, %xmm2
vinsertf128 $1, %xmm2, %ymm2, %ymm2
vpermilps $0, %ymm2, %ymm4 ## ymm4 = ymm4[0,0,0,0,4,4,4,4]
vextractf128 $1, %ymm4, %xmm5
vpxor %xmm6, %xmm6, %xmm6
vextractf128 $1, %ymm6, %xmm3
vinsertf128 $1, %xmm1, %ymm1, %ymm2
vpcmpgtd %xmm3, %xmm5, %xmm1
vpcmpgtd %xmm6, %xmm4, %xmm3
vinsertf128 $1, %xmm1, %ymm3, %ymm1
vmovdqa %ymm1, 96(%rsp) ## 32-byte Spill
vpermilps $0, %ymm2, %ymm2 ## ymm2 = ymm2[0,0,0,0,4,4,4,4]
vmovaps %ymm2, (%rsp) ## 32-byte Spill
vinsertf128 $1, %xmm0, %ymm0, %ymm0
vpermilps $0, %ymm0, %ymm0 ## ymm0 = ymm0[0,0,0,0,4,4,4,4]
vmovaps %ymm0, 64(%rsp) ## 32-byte Spill
xorl %r9d, %r9d
vmovmskps %ymm1, %r8d
.align 4, 0x90
LBB1_2: ## %for_test49.preheader
## =>This Loop Header: Depth=1
## Child Loop BB1_8 Depth 2
## Child Loop BB1_5 Depth 2
## Child Loop BB1_6 Depth 3
testl %edi, %edi
jle LBB1_9
## BB#3: ## %for_loop51.lr.ph
## in Loop: Header=BB1_2 Depth=1
vcvtsi2ss %r9d, %xmm0, %xmm2
movl %r9d, %r10d
imull %edi, %r10d
xorl %edx, %edx
testl %r8d, %r8d
je LBB1_8
## BB#4: ## in Loop: Header=BB1_2 Depth=1
vinsertf128 $1, %xmm2, %ymm2, %ymm0
vpermilps $0, %ymm0, %ymm0 ## ymm0 = ymm0[0,0,0,0,4,4,4,4]
vmovaps 32(%rsp), %ymm1 ## 32-byte Reload
vmulps %ymm1, %ymm0, %ymm0
vmovaps (%rsp), %ymm1 ## 32-byte Reload
vaddps %ymm0, %ymm1, %ymm8
.align 4, 0x90
LBB1_5: ## %for_loop.i.lr.ph
## Parent Loop BB1_2 Depth=1
## => This Loop Header: Depth=2
## Child Loop BB1_6 Depth 3
vmovd %edx, %xmm0
vinsertf128 $1, %xmm0, %ymm0, %ymm0
vpermilps $0, %ymm0, %ymm2 ## ymm2 = ymm2[0,0,0,0,4,4,4,4]
vextractf128 $1, %ymm2, %xmm0
vmovdqa LCPI1_0(%rip), %ymm1
vextractf128 $1, %ymm1, %xmm3
vpaddd %xmm3, %xmm0, %xmm0
vpaddd %xmm1, %xmm2, %xmm1
vinsertf128 $1, %xmm0, %ymm1, %ymm0
vcvtdq2ps %ymm0, %ymm0
vmovaps 128(%rsp), %ymm1 ## 32-byte Reload
vmulps %ymm1, %ymm0, %ymm0
vmovaps 64(%rsp), %ymm1 ## 32-byte Reload
vaddps %ymm0, %ymm1, %ymm10
vmovaps 96(%rsp), %ymm14 ## 32-byte Reload
vmovdqa %ymm6, %ymm11
vmovdqa %ymm6, %ymm9
vmovaps %ymm10, %ymm12
vmovaps %ymm8, %ymm13
.align 4, 0x90
LBB1_6: ## %for_loop.i
## Parent Loop BB1_2 Depth=1
## Parent Loop BB1_5 Depth=2
## => This Inner Loop Header: Depth=3
vmovdqa LCPI1_1(%rip), %ymm7
vextractf128 $1, %ymm7, %xmm0
vextractf128 $1, %ymm9, %xmm1
vmulps %ymm13, %ymm13, %ymm15
vmulps %ymm12, %ymm12, %ymm2
vaddps %ymm15, %ymm2, %ymm3
vpaddd %xmm0, %xmm1, %xmm1
vpaddd %xmm7, %xmm9, %xmm0
vmovaps LCPI1_2(%rip), %ymm7
vinsertf128 $1, %xmm1, %ymm0, %ymm0
vcmpltps %ymm3, %ymm7, %ymm1
vandps %ymm1, %ymm14, %ymm1
vorps %ymm11, %ymm1, %ymm11
vandnps %ymm14, %ymm11, %ymm7
vblendvps %ymm7, %ymm0, %ymm9, %ymm9
vextractf128 $1, %ymm9, %xmm0
vpcmpgtd %xmm0, %xmm5, %xmm3
vpcmpgtd %xmm9, %xmm4, %xmm1
vmulps LCPI1_3(%rip), %ymm12, %ymm0
vmulps %ymm13, %ymm0, %ymm0
vaddps %ymm8, %ymm0, %ymm0
vinsertf128 $1, %xmm3, %ymm1, %ymm1
vandps %ymm1, %ymm7, %ymm14
vmovmskps %ymm14, %eax
vblendvps %ymm7, %ymm0, %ymm13, %ymm13
vsubps %ymm15, %ymm2, %ymm0
vaddps %ymm10, %ymm0, %ymm0
vblendvps %ymm7, %ymm0, %ymm12, %ymm12
testl %eax, %eax
jne LBB1_6
## BB#7: ## %mandel___ffi.exit
## in Loop: Header=BB1_5 Depth=2
leal (%rdx,%r10), %eax
movslq %eax, %rax
vmovups %ymm9, (%rcx,%rax,4)
addl $8, %edx
cmpl %edi, %edx
jl LBB1_5
jmp LBB1_9
.align 4, 0x90
LBB1_8: ## %mandel___ffi.exit.us
## Parent Loop BB1_2 Depth=1
## => This Inner Loop Header: Depth=2
leal (%rdx,%r10), %eax
movslq %eax, %rax
vmovdqu %ymm6, (%rcx,%rax,4)
addl $8, %edx
cmpl %edi, %edx
jl LBB1_8
LBB1_9: ## %for_exit52
## in Loop: Header=BB1_2 Depth=1
incl %r9d
cmpl %esi, %r9d
jne LBB1_2
LBB1_10: ## %for_exit
movq %rbp, %rsp
popq %rbp
vzeroupper
ret
.subsections_via_symbols