en un intento de aprender algo sobre el ensamblaje de ARM, he escrito un proyecto de prueba simple para realizar la reducción de imagen mediante el ensamblaje en línea y las instrucciones NEON. se puede ver aquí:¿por qué la optimización de clang está rompiendo mi código ensamblador en línea?
https://github.com/rmaz/NEON-Image-Downscaling
después de algún esfuerzo me las arreglé para conseguir que funcione, días felices. excepto que solo funciona para niveles de optimización inferiores a -O2. He echado un vistazo al ASM generado, pero no veo ninguna razón obvia para que esto ocurra. ¿Alguien puede ofrecer alguna idea? aquí es la función responsable de la pieza de montaje en línea:
static void inline resizeRow(uint32_t *dst, uint32_t *src, uint32_t pixelsPerRow)
{
const uint32_t * rowB = src + pixelsPerRow;
// force the number of pixels per row to a mutliple of 8
pixelsPerRow = 8 * (pixelsPerRow/8);
__asm__ volatile("Lresizeloop: \n" // start loop
"vld1.32 {d0-d3}, [%1]! \n" // load 8 pixels from the top row
"vld1.32 {d4-d7}, [%2]! \n" // load 8 pixels from the bottom row
"vhadd.u8 q0, q0, q2 \n" // average the pixels vertically
"vhadd.u8 q1, q1, q3 \n"
"vtrn.32 q0, q2 \n" // transpose to put the horizontally adjacent pixels in different registers
"vtrn.32 q1, q3 \n"
"vhadd.u8 q0, q0, q2 \n" // average the pixels horizontally
"vhadd.u8 q1, q1, q3 \n"
"vtrn.32 d0, d1 \n" // fill the registers with pixels
"vtrn.32 d2, d3 \n"
"vswp d1, d2 \n"
"vst1.64 {d0-d1}, [%0]! \n" // store the result
"subs %3, %3, #8 \n" // subtract 8 from the pixel count
"bne Lresizeloop \n" // repeat until the row is complete
: "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow)
: "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow)
: "q0", "q1", "q2", "q3"
);
}
el funcionamiento genera de salida en O1 para la función de los alrededores y bucle es la siguiente:
.align 2
.code 16 @ @"\01-[BDPViewController downscaleImageNeon:]"
.thumb_func "-[BDPViewController downscaleImageNeon:]"
"-[BDPViewController downscaleImageNeon:]":
.cfi_startproc
Lfunc_begin4:
.loc 1 86 0 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0
@ BB#0:
.loc 1 86 1 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1
push {r4, r5, r6, r7, lr}
add r7, sp, #12
push.w {r8, r10, r11}
sub sp, #20
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0
.loc 1 88 20 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20
Ltmp41:
movw r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
Ltmp42:
mov r6, r2
Ltmp43:
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0
movt r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
LPC4_0:
add r0, pc
ldr.w r11, [r0]
mov r0, r6
blx _objc_retain
mov r4, r0
mov r0, r6
mov r1, r11
Ltmp44:
blx _objc_msgSend
blx _CGImageGetWidth
mov r5, r0
Ltmp45:
@DEBUG_VALUE: width <- R5+0
.loc 1 89 21 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21
mov r0, r6
mov r1, r11
str r5, [sp, #16] @ 4-byte Spill
blx _objc_msgSend
blx _CGImageGetHeight
mov r10, r0
Ltmp46:
@DEBUG_VALUE: height <- R10+0
.loc 1 90 26 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26
mov r0, r6
mov r1, r11
blx _objc_msgSend
blx _CGImageGetBytesPerRow
str r0, [sp, #12] @ 4-byte Spill
Ltmp47:
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
.loc 1 91 35 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35
mov r0, r6
mov r1, r11
blx _objc_msgSend
blx _CGImageGetAlphaInfo
str r0, [sp, #4] @ 4-byte Spill
Ltmp48:
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
.loc 1 94 45 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
mov r0, r6
mov r1, r11
blx _objc_msgSend
mov r6, r0
Ltmp49:
mov r0, r4
blx _objc_release
mov r0, r6
.loc 1 98 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
mul r8, r10, r5
Ltmp50:
@DEBUG_VALUE: width <- [sp+#16]+#0
.loc 1 94 45 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
blx _CGImageGetDataProvider
blx _CGDataProviderCopyData
Ltmp51:
@DEBUG_VALUE: data <- R0+0
str r0, [sp, #8] @ 4-byte Spill
Ltmp52:
@DEBUG_VALUE: data <- [sp+#8]+#0
.loc 1 95 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29
blx _CFDataGetBytePtr
mov r4, r0
Ltmp53:
@DEBUG_VALUE: buffer <- R4+0
.loc 1 98 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
lsr.w r0, r8, #2
movs r1, #4
blx _calloc
mov r5, r0
Ltmp54:
@DEBUG_VALUE: outputBuffer <- R5+0
mov r0, r10
Ltmp55:
@DEBUG_VALUE: height <- R0+0
.loc 1 101 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
cmp r0, #0
Ltmp56:
@DEBUG_VALUE: rowIndex <- 0+0
beq LBB4_3
@ BB#1: @ %.lr.ph
Ltmp57:
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: width <- [sp+#16]+#0
@DEBUG_VALUE: height <- R0+0
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
@DEBUG_VALUE: data <- [sp+#8]+#0
@DEBUG_VALUE: buffer <- R4+0
@DEBUG_VALUE: outputBuffer <- R5+0
@DEBUG_VALUE: rowIndex <- 0+0
ldr r1, [sp, #12] @ 4-byte Reload
Ltmp58:
@DEBUG_VALUE: bytesPerRow <- R1+0
mov.w r8, #0
lsl.w r11, r1, #1
.loc 1 104 74 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74
Ltmp59:
lsr.w r10, r1, #1
Ltmp60:
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
LBB4_2: @ =>This Inner Loop Header: Depth=1
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: width <- [sp+#16]+#0
@DEBUG_VALUE: height <- R0+0
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
@DEBUG_VALUE: data <- [sp+#8]+#0
@DEBUG_VALUE: outputBuffer <- R5+0
@DEBUG_VALUE: rowIndex <- 0+0
lsr.w r1, r8, #1
Ltmp61:
mov r6, r0
Ltmp62:
@DEBUG_VALUE: height <- R6+0
mla r0, r1, r10, r5
Ltmp63:
@DEBUG_VALUE: destRow <- R1+0
.loc 1 105 9 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9
ldr r2, [sp, #16] @ 4-byte Reload
mov r1, r4
Ltmp64:
bl _resizeRow
mov r0, r6
Ltmp65:
@DEBUG_VALUE: height <- R0+0
.loc 1 101 50 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50
add.w r8, r8, #2
Ltmp66:
@DEBUG_VALUE: rowIndex <- R8+0
.loc 1 101 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
add r4, r11
cmp r8, r0
blo LBB4_2
Ltmp67:
LBB4_3: @ %._crit_edge
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: width <- [sp+#16]+#0
@DEBUG_VALUE: height <- R0+0
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
@DEBUG_VALUE: data <- [sp+#8]+#0
@DEBUG_VALUE: outputBuffer <- R5+0
.loc 1 109 28 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28
ldr r1, [sp, #4] @ 4-byte Reload
Ltmp68:
lsrs r2, r0, #1
str r1, [sp]
mov r6, r5
Ltmp69:
@DEBUG_VALUE: outputBuffer <- R6+0
ldr r1, [sp, #16] @ 4-byte Reload
ldr r0, [sp, #12] @ 4-byte Reload
Ltmp70:
lsrs r1, r1, #1
lsrs r3, r0, #1
mov r0, r5
bl _createBitmapContext
mov r4, r0
Ltmp71:
@DEBUG_VALUE: context <- R4+0
.loc 1 110 30 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
blx _CGBitmapContextCreateImage
.loc 1 111 66 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
movw r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
.loc 1 110 30 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
mov r5, r0
Ltmp72:
@DEBUG_VALUE: scaledImage <- R5+0
.loc 1 111 66 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
movt r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
movw r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
movt r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
LPC4_1:
add r1, pc
LPC4_2:
add r0, pc
mov r2, r5
ldr r1, [r1]
ldr r0, [r0]
blx _objc_msgSend
Ltmp73:
@DEBUG_VALUE: returnImage <- R0+0
@ InlineAsm Start
mov r7, r7 @ marker for objc_retainAutoreleaseReturnValue
@ InlineAsm End
blx _objc_retainAutoreleasedReturnValue
Ltmp74:
mov r8, r0
.loc 1 112 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5
mov r0, r5
blx _CGImageRelease
.loc 1 113 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5
mov r0, r4
blx _CGContextRelease
.loc 1 114 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5
ldr r0, [sp, #8] @ 4-byte Reload
blx _CFRelease
.loc 1 115 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5
mov r0, r6
blx _free
Ltmp75:
.loc 1 118 1 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1
mov r0, r8
add sp, #20
pop.w {r8, r10, r11}
pop.w {r4, r5, r6, r7, lr}
Ltmp76:
b.w _objc_autoreleaseReturnValue
Ltmp77:
Lfunc_end4:
.cfi_endproc
.align 2
.code 16 @ @resizeRow
.thumb_func _resizeRow
_resizeRow:
.cfi_startproc
Lfunc_begin5:
.loc 1 26 0 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0
@ BB#0:
@DEBUG_VALUE: resizeRow:dst <- R0+0
@DEBUG_VALUE: resizeRow:src <- R1+0
@DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0
.loc 1 27 47 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47
add.w r3, r1, r2, lsl #2
Ltmp78:
@DEBUG_VALUE: rowB <- R3+0
.loc 1 30 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5
bic r2, r2, #7
Ltmp79:
.loc 1 32 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5
@ InlineAsm Start
Lresizeloop:
vld1.32 {d0-d3}, [r1]!
vld1.32 {d4-d7}, [r3]!
vhadd.u8 q0, q0, q2
vhadd.u8 q1, q1, q3
vtrn.32 q0, q2
vtrn.32 q1, q3
vhadd.u8 q0, q0, q2
vhadd.u8 q1, q1, q3
vtrn.32 d0, d1
vtrn.32 d2, d3
vswp d1, d2
vst1.64 {d0-d1}, [r0]!
subs r2, r2, #8
bne Lresizeloop
@ InlineAsm End
Ltmp80:
.loc 1 51 1 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1
bx lr
Ltmp81:
Lfunc_end5:
.cfi_endproc
y la salida no funcionamiento en O2 es tan sigue:
.align 2
.code 16 @ @"\01-[BDPViewController downscaleImageNeon:]"
.thumb_func "-[BDPViewController downscaleImageNeon:]"
"-[BDPViewController downscaleImageNeon:]":
.cfi_startproc
Lfunc_begin4:
.loc 1 86 0 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0
@ BB#0:
.loc 1 86 1 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1
push {r4, r5, r6, r7, lr}
add r7, sp, #12
push.w {r8, r10, r11}
sub sp, #20
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0
.loc 1 88 20 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20
Ltmp41:
movw r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
Ltmp42:
mov r6, r2
Ltmp43:
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0
movt r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
LPC4_0:
add r0, pc
ldr.w r11, [r0]
mov r0, r6
blx _objc_retain
mov r4, r0
mov r0, r6
mov r1, r11
Ltmp44:
blx _objc_msgSend
blx _CGImageGetWidth
mov r5, r0
Ltmp45:
@DEBUG_VALUE: width <- R5+0
.loc 1 89 21 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21
mov r0, r6
mov r1, r11
str r5, [sp, #16] @ 4-byte Spill
blx _objc_msgSend
blx _CGImageGetHeight
mov r10, r0
Ltmp46:
@DEBUG_VALUE: height <- R10+0
.loc 1 90 26 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26
mov r0, r6
mov r1, r11
blx _objc_msgSend
blx _CGImageGetBytesPerRow
str r0, [sp, #12] @ 4-byte Spill
Ltmp47:
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
.loc 1 91 35 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35
mov r0, r6
mov r1, r11
blx _objc_msgSend
blx _CGImageGetAlphaInfo
str r0, [sp, #4] @ 4-byte Spill
Ltmp48:
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
.loc 1 94 45 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
mov r0, r6
mov r1, r11
blx _objc_msgSend
mov r6, r0
Ltmp49:
mov r0, r4
blx _objc_release
mov r0, r6
.loc 1 98 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
mul r8, r10, r5
Ltmp50:
@DEBUG_VALUE: width <- [sp+#16]+#0
.loc 1 94 45 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
blx _CGImageGetDataProvider
blx _CGDataProviderCopyData
Ltmp51:
@DEBUG_VALUE: data <- R0+0
str r0, [sp, #8] @ 4-byte Spill
Ltmp52:
@DEBUG_VALUE: data <- [sp+#8]+#0
.loc 1 95 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29
blx _CFDataGetBytePtr
mov r4, r0
Ltmp53:
@DEBUG_VALUE: buffer <- R4+0
.loc 1 98 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
lsr.w r0, r8, #2
movs r1, #4
blx _calloc
mov r5, r0
Ltmp54:
@DEBUG_VALUE: outputBuffer <- R5+0
mov r0, r10
Ltmp55:
@DEBUG_VALUE: height <- R0+0
.loc 1 101 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
cmp r0, #0
Ltmp56:
@DEBUG_VALUE: rowIndex <- 0+0
beq LBB4_3
@ BB#1: @ %.lr.ph
Ltmp57:
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: width <- [sp+#16]+#0
@DEBUG_VALUE: height <- R0+0
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
@DEBUG_VALUE: data <- [sp+#8]+#0
@DEBUG_VALUE: buffer <- R4+0
@DEBUG_VALUE: outputBuffer <- R5+0
@DEBUG_VALUE: rowIndex <- 0+0
ldr r1, [sp, #12] @ 4-byte Reload
Ltmp58:
@DEBUG_VALUE: bytesPerRow <- R1+0
mov.w r8, #0
lsl.w r11, r1, #1
.loc 1 104 74 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74
Ltmp59:
lsr.w r10, r1, #1
Ltmp60:
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
LBB4_2: @ =>This Inner Loop Header: Depth=1
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: width <- [sp+#16]+#0
@DEBUG_VALUE: height <- R0+0
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
@DEBUG_VALUE: data <- [sp+#8]+#0
@DEBUG_VALUE: outputBuffer <- R5+0
@DEBUG_VALUE: rowIndex <- 0+0
lsr.w r1, r8, #1
Ltmp61:
mov r6, r0
Ltmp62:
@DEBUG_VALUE: height <- R6+0
mla r0, r1, r10, r5
Ltmp63:
@DEBUG_VALUE: destRow <- R1+0
.loc 1 105 9 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9
ldr r2, [sp, #16] @ 4-byte Reload
mov r1, r4
Ltmp64:
bl _resizeRow
mov r0, r6
Ltmp65:
@DEBUG_VALUE: height <- R0+0
.loc 1 101 50 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50
add.w r8, r8, #2
Ltmp66:
@DEBUG_VALUE: rowIndex <- R8+0
.loc 1 101 29 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
add r4, r11
cmp r8, r0
blo LBB4_2
Ltmp67:
LBB4_3: @ %._crit_edge
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
@DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
@DEBUG_VALUE: width <- [sp+#16]+#0
@DEBUG_VALUE: height <- R0+0
@DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
@DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
@DEBUG_VALUE: data <- [sp+#8]+#0
@DEBUG_VALUE: outputBuffer <- R5+0
.loc 1 109 28 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28
ldr r1, [sp, #4] @ 4-byte Reload
Ltmp68:
lsrs r2, r0, #1
str r1, [sp]
mov r6, r5
Ltmp69:
@DEBUG_VALUE: outputBuffer <- R6+0
ldr r1, [sp, #16] @ 4-byte Reload
ldr r0, [sp, #12] @ 4-byte Reload
Ltmp70:
lsrs r1, r1, #1
lsrs r3, r0, #1
mov r0, r5
bl _createBitmapContext
mov r4, r0
Ltmp71:
@DEBUG_VALUE: context <- R4+0
.loc 1 110 30 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
blx _CGBitmapContextCreateImage
.loc 1 111 66 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
movw r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
.loc 1 110 30 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
mov r5, r0
Ltmp72:
@DEBUG_VALUE: scaledImage <- R5+0
.loc 1 111 66 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
movt r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
movw r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
movt r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
LPC4_1:
add r1, pc
LPC4_2:
add r0, pc
mov r2, r5
ldr r1, [r1]
ldr r0, [r0]
blx _objc_msgSend
Ltmp73:
@DEBUG_VALUE: returnImage <- R0+0
@ InlineAsm Start
mov r7, r7 @ marker for objc_retainAutoreleaseReturnValue
@ InlineAsm End
blx _objc_retainAutoreleasedReturnValue
Ltmp74:
mov r8, r0
.loc 1 112 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5
mov r0, r5
blx _CGImageRelease
.loc 1 113 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5
mov r0, r4
blx _CGContextRelease
.loc 1 114 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5
ldr r0, [sp, #8] @ 4-byte Reload
blx _CFRelease
.loc 1 115 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5
mov r0, r6
blx _free
Ltmp75:
.loc 1 118 1 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1
mov r0, r8
add sp, #20
pop.w {r8, r10, r11}
pop.w {r4, r5, r6, r7, lr}
Ltmp76:
b.w _objc_autoreleaseReturnValue
Ltmp77:
Lfunc_end4:
.cfi_endproc
.align 2
.code 16 @ @resizeRow
.thumb_func _resizeRow
_resizeRow:
.cfi_startproc
Lfunc_begin5:
.loc 1 26 0 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0
@ BB#0:
@DEBUG_VALUE: resizeRow:dst <- R0+0
@DEBUG_VALUE: resizeRow:src <- R1+0
@DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0
.loc 1 27 47 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47
add.w r3, r1, r2, lsl #2
Ltmp78:
@DEBUG_VALUE: rowB <- R3+0
.loc 1 30 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5
bic r2, r2, #7
Ltmp79:
.loc 1 32 5 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5
@ InlineAsm Start
Lresizeloop:
vld1.32 {d0-d3}, [r1]!
vld1.32 {d4-d7}, [r3]!
vhadd.u8 q0, q0, q2
vhadd.u8 q1, q1, q3
vtrn.32 q0, q2
vtrn.32 q1, q3
vhadd.u8 q0, q0, q2
vhadd.u8 q1, q1, q3
vtrn.32 d0, d1
vtrn.32 d2, d3
vswp d1, d2
vst1.64 {d0-d1}, [r0]!
subs r2, r2, #8
bne Lresizeloop
@ InlineAsm End
Ltmp80:
.loc 1 51 1 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1
bx lr
Ltmp81:
Lfunc_end5:
.cfi_endproc
¿Por qué no publicas el código generado? –
Esos dos parecen idénticos. ¿Es este el resultado de ensamblaje del compilador? Intenta usar objdump para obtener el ensamblaje de dos binarios compilados de forma diferente. – auselen