C言語からアセンブリ言語に書き出した時の計算量について
前提と質問内容
(前提)3x3の正方行列の掛け算を行うプログラムをC言語で書き、
compiler explorerというC言語のアセンブラ出力を確認できるページで
x86-64 icc 19.0.1とMIPS gcc 5.4のアセンブリ言語に出力しました。
(質問)内側のループにおける代入文とフロー制御の命令回数と、プログラム全体の計算複雑度はC言語からアセンブリ言語に変換すると変わるのでしょうか。
現在の見解
C言語のプログラムでは、内側のループにおける代入文とフロー制御(C言語はプログラム上でプログラマーが行う必要がないのかもしれませんが)の命令数は
C[i][j] += A[i][k]*B[k][j];
の1回だと考えています。
またプログラム全体の計算複雑度はO(N^3)だと思います。
(現在の見解)内側のループにおける代入文とフロー制御は、アセンブリ言語の方が明確にアドレスなどを指定しないといけないので、命令数が増えると考えています。一方、プログラム全体の計算複雑度はどのように考えればいいのかアセンブリ言語を見てもわからず困っています。
内側のループにおける代入文とフロー制御について、
例えばMIPSの場合、compiler explorerで
C[i][j] += A[i][k]*B[k][j];
と対応する以下の部分の命令数を数えれば「内側のループにおける代入文とフロー制御の命令回数」と言えるのでしょうか。
lw $3,32($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,36($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
lwc1 $f2,164($2)
nop
lwc1 $f3,160($2)
lw $3,32($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,40($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
lwc1 $f4,20($2)
nop
lwc1 $f5,16($2)
lw $3,40($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,36($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
lwc1 $f0,92($2)
nop
lwc1 $f1,88($2)
nop
mul.d $f0,$f4,$f0
add.d $f0,$f2,$f0
lw $3,32($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,36($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
swc1 $f0,164($2)
swc1 $f1,160($2)
実行中のプログラム
x86-64 icc 19.0.1とMIPS gcc 5.4のアセンブリ言語に出力したプログラムも掲載しましたが、C言語と対応する部分を確認するには、以下のC言語のプログラムをcompiler explorerに貼り付けて変換いただく方がわかりやすいと思います。
C言語のプログラム
#include<stdio.h>
#define N 3
int
main(int argc, char *argv[])
{
double A[N][N] = {
{1.0, 3.0, 1.0},
{3.0, 1.0, 3.0},
{1.0, 1.0, 1.0}
};
double B[N][N] = {
{6.0, 1.0, 0.0},
{0.0, 1.0, 6.0},
{0.0, 1.0, 1.0}
};
double C[N][N] = {
{0.0, 0.0, 0.0},
{0.0, 0.0, 0.0},
{0.0, 0.0, 0.0}
};
int i, j, k;
for(i=0; i<N; i++)
for(j=0; j<N; j++)
for(k=0; k<N; k++)
C[i][j] += A[i][k]*B[k][j];
for(i=0; i<N; i++)
for(j=0; j<N; j++)
printf("C[%d][%d] = %f\n", i, j, C[i][j]);
}
MIPSのプログラム
$LC2:
.ascii "C[%d][%d] = %f\012\000"
$LC0:
.word 1072693248
.word 0
.word 1074266112
.word 0
.word 1072693248
.word 0
.word 1074266112
.word 0
.word 1072693248
.word 0
.word 1074266112
.word 0
.word 1072693248
.word 0
.word 1072693248
.word 0
.word 1072693248
.word 0
$LC1:
.word 1075314688
.word 0
.word 1072693248
.word 0
.word 0
.word 0
.word 0
.word 0
.word 1072693248
.word 0
.word 1075314688
.word 0
.word 0
.word 0
.word 1072693248
.word 0
.word 1072693248
.word 0
main:
addiu $sp,$sp,-272
sw $31,268($sp)
sw $fp,264($sp)
move $fp,$sp
sw $4,272($fp)
sw $5,276($fp)
lui $2,%hi($LC0)
addiu $3,$fp,48
addiu $2,$2,%lo($LC0)
li $4,72 # 0x48
move $6,$4
move $5,$2
move $4,$3
jal memcpy
nop
lui $2,%hi($LC1)
addiu $3,$fp,120
addiu $2,$2,%lo($LC1)
li $4,72 # 0x48
move $6,$4
move $5,$2
move $4,$3
jal memcpy
nop
sw $0,196($fp)
sw $0,192($fp)
sw $0,204($fp)
sw $0,200($fp)
sw $0,212($fp)
sw $0,208($fp)
sw $0,220($fp)
sw $0,216($fp)
sw $0,228($fp)
sw $0,224($fp)
sw $0,236($fp)
sw $0,232($fp)
sw $0,244($fp)
sw $0,240($fp)
sw $0,252($fp)
sw $0,248($fp)
sw $0,260($fp)
sw $0,256($fp)
sw $0,32($fp)
$L7:
lw $2,32($fp)
nop
slt $2,$2,3
beq $2,$0,$L2
nop
sw $0,36($fp)
$L6:
lw $2,36($fp)
nop
slt $2,$2,3
beq $2,$0,$L3
nop
sw $0,40($fp)
$L5:
lw $2,40($fp)
nop
slt $2,$2,3
beq $2,$0,$L4
nop
lw $3,32($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,36($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
lwc1 $f2,164($2)
nop
lwc1 $f3,160($2)
lw $3,32($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,40($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
lwc1 $f4,20($2)
nop
lwc1 $f5,16($2)
lw $3,40($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,36($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
lwc1 $f0,92($2)
nop
lwc1 $f1,88($2)
nop
mul.d $f0,$f4,$f0
add.d $f0,$f2,$f0
lw $3,32($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,36($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
swc1 $f0,164($2)
swc1 $f1,160($2)
lw $2,40($fp)
nop
addiu $2,$2,1
sw $2,40($fp)
b $L5
nop
$L4:
lw $2,36($fp)
nop
addiu $2,$2,1
sw $2,36($fp)
b $L6
nop
$L3:
lw $2,32($fp)
nop
addiu $2,$2,1
sw $2,32($fp)
b $L7
nop
$L2:
sw $0,32($fp)
$L11:
lw $2,32($fp)
nop
slt $2,$2,3
beq $2,$0,$L8
nop
sw $0,36($fp)
$L10:
lw $2,36($fp)
nop
slt $2,$2,3
beq $2,$0,$L9
nop
lw $3,32($fp)
nop
move $2,$3
sll $2,$2,1
addu $2,$2,$3
lw $3,36($fp)
nop
addu $2,$2,$3
sll $2,$2,3
addiu $3,$fp,32
addu $2,$3,$2
lwc1 $f0,164($2)
nop
lwc1 $f1,160($2)
nop
swc1 $f0,20($sp)
swc1 $f1,16($sp)
lw $6,36($fp)
lw $5,32($fp)
lui $2,%hi($LC2)
addiu $4,$2,%lo($LC2)
jal printf
nop
lw $2,36($fp)
nop
addiu $2,$2,1
sw $2,36($fp)
b $L10
nop
$L9:
lw $2,32($fp)
nop
addiu $2,$2,1
sw $2,32($fp)
b $L11
nop
$L8:
move $2,$0
move $sp,$fp
lw $31,268($sp)
lw $fp,264($sp)
addiu $sp,$sp,272
j $31
nop
x86-64 のプログラム
.L_2__STRING.0:
.long 1680169795
.long 1680169821
.long 540876893
.long 681509
main:
push rbp #7.1
mov rbp, rsp #7.1
sub rsp, 256 #7.1
mov QWORD PTR [-8+rbp], rbx #7.1[spill]
mov DWORD PTR [-240+rbp], edi #7.1
mov QWORD PTR [-232+rbp], rsi #7.1
lea rax, QWORD PTR [-224+rbp] #8.17
mov edx, offset flat: A.136.0 #8.17
mov ecx, 72 #8.17
mov rdi, rax #8.17
mov rsi, rdx #8.17
mov eax, ecx #8.17
shr rcx, 2 #8.17
rep movsd #8.17
mov ecx, eax #8.17
and ecx, 3 #8.17
rep movsb #8.17
lea rax, QWORD PTR [-152+rbp] #14.17
mov edx, offset flat: B.136.0 #14.17
mov ecx, 72 #14.17
mov rdi, rax #14.17
mov rsi, rdx #14.17
mov eax, ecx #14.17
shr rcx, 2 #14.17
rep movsd #14.17
mov ecx, eax #14.17
and ecx, 3 #14.17
rep movsb #14.17
lea rax, QWORD PTR [-80+rbp] #20.17
mov edx, 0 #20.17
mov ecx, 72 #20.17
mov rdi, rax #20.17
mov eax, edx #20.17
and eax, 65535 #20.17
mov ah, al #20.17
mov edx, eax #20.17
shl eax, 16 #20.17
or eax, edx #20.17
mov esi, ecx #20.17
shr rcx, 2 #20.17
rep stosd #20.17
mov ecx, esi #20.17
and ecx, 3 #20.17
rep stosb #20.17
mov DWORD PTR [-256+rbp], 0 #27.6
..B1.5: # Preds ..B1.6 ..B1.4
mov eax, DWORD PTR [-256+rbp] #27.11
cmp eax, 3 #27.13
jl ..B1.7 # Prob 50% #27.13
jmp ..B1.13 # Prob 100% #27.13
..B1.6: # Preds ..B1.8
mov eax, 1 #27.16
add eax, DWORD PTR [-256+rbp] #27.16
mov DWORD PTR [-256+rbp], eax #27.16
jmp ..B1.5 # Prob 100% #27.16
..B1.7: # Preds ..B1.5
mov DWORD PTR [-252+rbp], 0 #28.7
..B1.8: # Preds ..B1.9 ..B1.7
mov eax, DWORD PTR [-252+rbp] #28.12
cmp eax, 3 #28.14
jl ..B1.10 # Prob 50% #28.14
jmp ..B1.6 # Prob 100% #28.14
..B1.9: # Preds ..B1.11
mov eax, 1 #28.17
add eax, DWORD PTR [-252+rbp] #28.17
mov DWORD PTR [-252+rbp], eax #28.17
jmp ..B1.8 # Prob 100% #28.17
..B1.10: # Preds ..B1.8
mov DWORD PTR [-248+rbp], 0 #29.8
..B1.11: # Preds ..B1.12 ..B1.10
mov eax, DWORD PTR [-248+rbp] #29.13
cmp eax, 3 #29.15
jge ..B1.9 # Prob 50% #29.15
lea rax, QWORD PTR [-80+rbp] #30.5
mov edx, DWORD PTR [-256+rbp] #30.7
movsxd rdx, edx #30.5
imul rdx, rdx, 24 #30.5
add rax, rdx #30.5
mov edx, DWORD PTR [-252+rbp] #30.10
movsxd rdx, edx #30.5
imul rdx, rdx, 8 #30.5
add rax, rdx #30.5
lea rdx, QWORD PTR [-224+rbp] #30.16
mov ecx, DWORD PTR [-256+rbp] #30.18
movsxd rcx, ecx #30.16
imul rcx, rcx, 24 #30.16
add rdx, rcx #30.16
mov ecx, DWORD PTR [-248+rbp] #30.21
movsxd rcx, ecx #30.16
imul rcx, rcx, 8 #30.16
add rdx, rcx #30.16
movsd xmm0, QWORD PTR [rdx] #30.16
lea rdx, QWORD PTR [-152+rbp] #30.24
mov ecx, DWORD PTR [-248+rbp] #30.26
movsxd rcx, ecx #30.24
imul rcx, rcx, 24 #30.24
add rdx, rcx #30.24
mov ecx, DWORD PTR [-252+rbp] #30.29
movsxd rcx, ecx #30.24
imul rcx, rcx, 8 #30.24
add rdx, rcx #30.24
movsd xmm1, QWORD PTR [rdx] #30.24
mulsd xmm0, xmm1 #30.24
movsd xmm1, QWORD PTR [rax] #30.5
addsd xmm1, xmm0 #30.5
lea rax, QWORD PTR [-80+rbp] #30.5
mov edx, DWORD PTR [-256+rbp] #30.7
movsxd rdx, edx #30.5
imul rdx, rdx, 24 #30.5
add rax, rdx #30.5
mov edx, DWORD PTR [-252+rbp] #30.10
movsxd rdx, edx #30.5
imul rdx, rdx, 8 #30.5
add rax, rdx #30.5
movsd QWORD PTR [rax], xmm1 #30.5
mov eax, 1 #29.18
add eax, DWORD PTR [-248+rbp] #29.18
mov DWORD PTR [-248+rbp], eax #29.18
jmp ..B1.11 # Prob 100% #29.18
..B1.13: # Preds ..B1.5
mov DWORD PTR [-256+rbp], 0 #32.6
..B1.14: # Preds ..B1.15 ..B1.13
mov eax, DWORD PTR [-256+rbp] #32.11
cmp eax, 3 #32.13
jl ..B1.16 # Prob 50% #32.13
jmp ..B1.20 # Prob 100% #32.13
..B1.15: # Preds ..B1.17
mov eax, 1 #32.16
add eax, DWORD PTR [-256+rbp] #32.16
mov DWORD PTR [-256+rbp], eax #32.16
jmp ..B1.14 # Prob 100% #32.16
..B1.16: # Preds ..B1.14
mov DWORD PTR [-252+rbp], 0 #33.7
..B1.17: # Preds ..B1.18 ..B1.16
mov eax, DWORD PTR [-252+rbp] #33.12
cmp eax, 3 #33.14
jl ..B1.19 # Prob 50% #33.14
jmp ..B1.15 # Prob 100% #33.14
..B1.18: # Preds ..B1.23
mov eax, 1 #33.17
add eax, DWORD PTR [-252+rbp] #33.17
mov DWORD PTR [-252+rbp], eax #33.17
jmp ..B1.17 # Prob 100% #33.17
..B1.19: # Preds ..B1.17
mov eax, offset flat: .L_2__STRING.0 #34.4
mov edx, DWORD PTR [-256+rbp] #34.4
mov ecx, DWORD PTR [-252+rbp] #34.4
lea rbx, QWORD PTR [-80+rbp] #34.4
mov esi, DWORD PTR [-256+rbp] #34.4
movsxd rsi, esi #34.4
imul rsi, rsi, 24 #34.4
add rbx, rsi #34.4
mov esi, DWORD PTR [-252+rbp] #34.4
movsxd rsi, esi #34.4
imul rsi, rsi, 8 #34.4
add rbx, rsi #34.4
movsd xmm0, QWORD PTR [rbx] #34.4
mov rdi, rax #34.4
mov esi, edx #34.4
mov edx, ecx #34.4
mov eax, 1 #34.4
call printf #34.4
mov DWORD PTR [-244+rbp], eax #34.4
jmp ..B1.18 # Prob 100% #34.4
..B1.20: # Preds ..B1.14
mov eax, 0 #35.1
mov rbx, QWORD PTR [-8+rbp] #35.1[spill]
leave #35.1
ret #35.1
A.136.0:
.long 0x00000000,0x3ff00000
.long 0x00000000,0x40080000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x40080000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x40080000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x3ff00000
B.136.0:
.long 0x00000000,0x40180000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x40180000
.long 0x00000000,0x00000000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x3ff00000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000
.long 0x00000000,0x00000000