V로 인한 페널티 총합은 일정하므로, D가 작은 순서대로 푸는 것이 최적이 됩니다.

어셈블리로 정렬을 구현하는 게 가장 어려웠던 문제였습니다. N이 11이라 $O(N^2)$ 정렬만 구현하면 됐던 게 정말 다행이네요.

정답 코드:

section .rodata
    d_in_: db " %d",0
    q_in_: db " %lld",0
    s_in_: db " %s",0
    d_out_ln: db "%d",10,0
    q_out_ln: db "%lld",10,0
    s_out_: db "%s",0
    c_out_: db "%c",0

section .data
    d_in_val_: dd 0
    q_in_val_: dq 0

; dget address
%macro dget 1
    mov rdi, d_in_
    mov rsi, %1
    xor rax, rax
    call scanf
%endmacro

; == dget &eax
%macro dget 0
    mov rdi, d_in_
    mov rsi, d_in_val_
    xor rax, rax
    call scanf
    mov eax, [d_in_val_]
%endmacro

%macro sget 1
    mov rdi, s_in_
    mov rsi, %1
    xor rax, rax
    call scanf
%endmacro

; qget address
%macro qget 1
    mov rdi, q_in_
    mov rsi, %1
    xor rax, rax
    call scanf
%endmacro

; == qget rax
%macro qget 0
    mov rdi, q_in_
    mov rsi, q_in_val_
    xor rax, rax
    call scanf
    mov rax, [q_in_val_]
%endmacro

%macro putc 1
    mov rdi, c_out_
    mov sil, %1
    xor rax, rax
    call printf
%endmacro

%macro sprint 1
    mov rdi, s_out_
    mov rsi, %1
    xor rax, rax
    call printf
%endmacro

; dprintln value
%macro dprintln 1
    mov rdi, d_out_ln
    mov esi, %1
    xor rax, rax
    call printf
%endmacro

; == dprintln eax
%macro dprintln 0
    dprintln eax
%endmacro

; qprintln value
%macro qprintln 1
    mov rdi, q_out_ln
    mov rsi, %1
    xor rax, rax
    call printf
%endmacro

%macro qprintln 0
    qprintln rax
%endmacro

; uses r11 to add
%macro addd 2
    mov r11, %1
    add r11, %2
    mov %1, r11
%endmacro

%macro open 0
    push rbp
    mov rbp, rsp
%endmacro

%macro open 1
    push rbp
    mov rbp, rsp
    sub rsp, %1
%endmacro

%macro close 0
    leave
    ret
%endmacro

%macro terminate 0
    xor rax, rax
    leave
    ret
%endmacro

%macro terminate 1
    mov rax, %1
    leave
    ret
%endmacro

section .text
    global main
    extern scanf
    extern printf
    extern strcmp

; rdi: start, rsi: end
q_naive_sort:
    open

    mov r8, rdi
    lea r9, [rel r8 + 8]

    ; cmp: rax, rcx
    ; l: r8, r: r9
    .loop:
    mov rax, [r8]
    mov rcx, [r9]
    cmp rax, rcx
    jle .after_swap

    mov [r8], rcx
    mov [r9], rax

    .after_swap:
    add r9, 8
    cmp r9, rsi
    jl .loop

    add r8, 8
    lea r9, [rel r8 + 8]
    cmp r9, rsi
    jl .loop

    close

section .data
    a: dq 0
    b: dq 0
    x: dq 0
    y: dq 0

    i: dd 0
    j: dd 0
    k: dd 0
    l: dd 0

    c: db 0

    s: times 100 db 0
    s1: times 100 db 0
    s2: times 100 db 0

    arr: times 11 dq 0

section .text

main:
    open

    xor r12, r12 ;ans

    xor r13, r13
    .loop0:

    qget
    mov [rel arr + r13 * 8], rax

    qget
    imul rax, 20
    add r12, rax

    add r13, 1
    cmp r13, 11
    jl .loop0

    ;qprintln r12

    lea rdi, [rel arr]
    lea rsi, [rel arr + 11 * 8]
    call q_naive_sort

    xor r13, r13
    xor r11, r11
    .loop1:
    add r11, [rel arr + r13 * 8]
    add r12, r11

    add r13, 1
    cmp r13, 11
    jl .loop1

    qprintln r12

    terminate