BATCH 3 | Project Stage 2, Part 3 - Cloned Functions Comparison and Reflection
Project Stage 2 - Part 3
Cloned Functions Comparison and Reflection
Let's Dive In!
scale_samples.default:
00000000004008e0 <scale_samples.default>:
4008e0: 7100005f cmp w2, #0x0
4008e4: 5400090d b.le 400a04 <scale_samples.default+0x124>
4008e8: 53114064 lsl w4, w3, #15
4008ec: 5290a3e5 mov w5, #0x851f // #34079
4008f0: 4b030084 sub w4, w4, w3
4008f4: 72aa3d65 movk w5, #0x51eb, lsl #16
4008f8: 51000447 sub w7, w2, #0x1
4008fc: 2a0203e6 mov w6, w2
400900: 9b257c85 smull x5, w4, w5
400904: 9365fca5 asr x5, x5, #37
400908: 4b847ca4 sub w4, w5, w4, asr #31
40090c: 531f7884 lsl w4, w4, #1
400910: 710008ff cmp w7, #0x2
400914: 540007a9 b.ls 400a08 <scale_samples.default+0x128> // b.plast
400918: 91000803 add x3, x0, #0x2
40091c: cb030023 sub x3, x1, x3
400920: f100307f cmp x3, #0xc
400924: 54000729 b.ls 400a08 <scale_samples.default+0x128> // b.plast
400928: 710018ff cmp w7, #0x6
40092c: 54000829 b.ls 400a30 <scale_samples.default+0x150> // b.plast
400930: 53037c45 lsr w5, w2, #3
400934: 4e040c9f dup v31.4s, w4
400938: d2800003 mov x3, #0x0 // #0
40093c: d37ceca5 lsl x5, x5, #4
400940: 3ce36800 ldr q0, [x0, x3]
400944: 0f10a41d sxtl v29.4s, v0.4h
400948: 4f10a400 sxtl2 v0.4s, v0.8h
40094c: 4ebf9fbd mul v29.4s, v29.4s, v31.4s
400950: 4ebf9c00 mul v0.4s, v0.4s, v31.4s
400954: 4e405ba0 uzp2 v0.8h, v29.8h, v0.8h
400958: 3ca36820 str q0, [x1, x3]
40095c: 91004063 add x3, x3, #0x10
400960: eb0300bf cmp x5, x3
400964: 54fffee1 b.ne 400940 <scale_samples.default+0x60> // b.any
400968: 121d7043 and w3, w2, #0xfffffff8
40096c: 2a0303e5 mov w5, w3
400970: 6b03005f cmp w2, w3
400974: 54000480 b.eq 400a04 <scale_samples.default+0x124> // b.none
400978: 4b030046 sub w6, w2, w3
40097c: 510004c7 sub w7, w6, #0x1
400980: 710008ff cmp w7, #0x2
400984: 54000169 b.ls 4009b0 <scale_samples.default+0xd0> // b.plast
400988: d37f7ca5 ubfiz x5, x5, #1, #32
40098c: fc65681e ldr d30, [x0, x5]
400990: 0f10a7de sxtl v30.4s, v30.4h
400994: 4ebf9fde mul v30.4s, v30.4s, v31.4s
400998: 0f1087de shrn v30.4h, v30.4s, #16
40099c: fc25683e str d30, [x1, x5]
4009a0: f24004df tst x6, #0x3
4009a4: 54000300 b.eq 400a04 <scale_samples.default+0x124> // b.none
4009a8: 121e74c6 and w6, w6, #0xfffffffc
scale_samples.Mrng:
0000000000400a40 <scale_samples._Mrng>:
400a40: 7100005f cmp w2, #0x0
400a44: 5400090d b.le 400b64 <scale_samples._Mrng+0x124>
400a48: 53114064 lsl w4, w3, #15
400a4c: 5290a3e5 mov w5, #0x851f // #34079
400a50: 4b030084 sub w4, w4, w3
400a54: 72aa3d65 movk w5, #0x51eb, lsl #16
400a58: 51000447 sub w7, w2, #0x1
400a5c: 2a0203e6 mov w6, w2
400a60: 9b257c85 smull x5, w4, w5
400a64: 9365fca5 asr x5, x5, #37
400a68: 4b847ca4 sub w4, w5, w4, asr #31
400a6c: 531f7884 lsl w4, w4, #1
400a70: 710008ff cmp w7, #0x2
400a74: 540007a9 b.ls 400b68 <scale_samples._Mrng+0x128> // b.plast
400a78: 91000803 add x3, x0, #0x2
400a7c: cb030023 sub x3, x1, x3
400a80: f100307f cmp x3, #0xc
400a84: 54000729 b.ls 400b68 <scale_samples._Mrng+0x128> // b.plast
400a88: 710018ff cmp w7, #0x6
400a8c: 54000829 b.ls 400b90 <scale_samples._Mrng+0x150> // b.plast
400a90: 53037c45 lsr w5, w2, #3
400a94: 4e040c9f dup v31.4s, w4
400a98: d2800003 mov x3, #0x0 // #0
400a9c: d37ceca5 lsl x5, x5, #4
400aa0: 3ce36800 ldr q0, [x0, x3]
400aa4: 0f10a41d sxtl v29.4s, v0.4h
400aa8: 4f10a400 sxtl2 v0.4s, v0.8h
400aac: 4ebf9fbd mul v29.4s, v29.4s, v31.4s
400ab0: 4ebf9c00 mul v0.4s, v0.4s, v31.4s
400ab4: 4e405ba0 uzp2 v0.8h, v29.8h, v0.8h
400ab8: 3ca36820 str q0, [x1, x3]
400abc: 91004063 add x3, x3, #0x10
400ac0: eb0300bf cmp x5, x3
400ac4: 54fffee1 b.ne 400aa0 <scale_samples._Mrng+0x60> // b.any
400ac8: 121d7043 and w3, w2, #0xfffffff8
400acc: 2a0303e5 mov w5, w3
400ad0: 6b03005f cmp w2, w3
400ad4: 54000480 b.eq 400b64 <scale_samples._Mrng+0x124> // b.none
400ad8: 4b030046 sub w6, w2, w3
400adc: 510004c7 sub w7, w6, #0x1
400ae0: 710008ff cmp w7, #0x2
400ae4: 54000169 b.ls 400b10 <scale_samples._Mrng+0xd0> // b.plast
400ae8: d37f7ca5 ubfiz x5, x5, #1, #32
400aec: fc65681e ldr d30, [x0, x5]
400af0: 0f10a7de sxtl v30.4s, v30.4h
400af4: 4ebf9fde mul v30.4s, v30.4s, v31.4s
400af8: 0f1087de shrn v30.4h, v30.4s, #16
400afc: fc25683e str d30, [x1, x5]
400b00: f24004df tst x6, #0x3
scale_samples.default:
00000000004008e0 <scale_samples.default>:
4008e0: 7100005f cmp w2, #0x0
4008e4: 5400090d b.le 400a04 <scale_samples.default+0x124>
4008e8: 53114064 lsl w4, w3, #15
4008ec: 5290a3e5 mov w5, #0x851f // #34079
4008f0: 4b030084 sub w4, w4, w3
4008f4: 72aa3d65 movk w5, #0x51eb, lsl #16
4008f8: 51000447 sub w7, w2, #0x1
4008fc: 2a0203e6 mov w6, w2
400900: 9b257c85 smull x5, w4, w5
400904: 9365fca5 asr x5, x5, #37
400908: 4b847ca4 sub w4, w5, w4, asr #31
40090c: 531f7884 lsl w4, w4, #1
400910: 710008ff cmp w7, #0x2
400914: 540007a9 b.ls 400a08 <scale_samples.default+0x128> // b.plast
400918: 91000803 add x3, x0, #0x2
40091c: cb030023 sub x3, x1, x3
400920: f100307f cmp x3, #0xc
400924: 54000729 b.ls 400a08 <scale_samples.default+0x128> // b.plast
400928: 710018ff cmp w7, #0x6
40092c: 54000829 b.ls 400a30 <scale_samples.default+0x150> // b.plast
400930: 53037c45 lsr w5, w2, #3
400934: 4e040c9f dup v31.4s, w4
400938: d2800003 mov x3, #0x0 // #0
40093c: d37ceca5 lsl x5, x5, #4
400940: 3ce36800 ldr q0, [x0, x3]
400944: 0f10a41d sxtl v29.4s, v0.4h
400948: 4f10a400 sxtl2 v0.4s, v0.8h
40094c: 4ebf9fbd mul v29.4s, v29.4s, v31.4s
400950: 4ebf9c00 mul v0.4s, v0.4s, v31.4s
400954: 4e405ba0 uzp2 v0.8h, v29.8h, v0.8h
400958: 3ca36820 str q0, [x1, x3]
40095c: 91004063 add x3, x3, #0x10
400960: eb0300bf cmp x5, x3
400964: 54fffee1 b.ne 400940 <scale_samples.default+0x60> // b.any
400968: 121d7043 and w3, w2, #0xfffffff8
40096c: 2a0303e5 mov w5, w3
400970: 6b03005f cmp w2, w3
400974: 54000480 b.eq 400a04 <scale_samples.default+0x124> // b.none
400978: 4b030046 sub w6, w2, w3
40097c: 510004c7 sub w7, w6, #0x1
400980: 710008ff cmp w7, #0x2
400984: 54000169 b.ls 4009b0 <scale_samples.default+0xd0> // b.plast
400988: d37f7ca5 ubfiz x5, x5, #1, #32
40098c: fc65681e ldr d30, [x0, x5]
400990: 0f10a7de sxtl v30.4s, v30.4h
400994: 4ebf9fde mul v30.4s, v30.4s, v31.4s
400998: 0f1087de shrn v30.4h, v30.4s, #16
40099c: fc25683e str d30, [x1, x5]
4009a0: f24004df tst x6, #0x3
4009a4: 54000300 b.eq 400a04 <scale_samples.default+0x124> // b.none
4009a8: 121e74c6 and w6, w6, #0xfffffffc
scale_samples.Msve2:
0000000000400a40 <scale_samples._Msve2>:
400a40: 7100005f cmp w2, #0x0
400a44: 5400034d b.le 400aac <scale_samples._Msve2+0x6c>
400a48: 53114064 lsl w4, w3, #15
400a4c: 5290a3e5 mov w5, #0x851f // #34079
400a50: 4b030084 sub w4, w4, w3
400a54: 72aa3d65 movk w5, #0x51eb, lsl #16
400a58: 91000806 add x6, x0, #0x2
400a5c: 0460e3e3 cnth x3
400a60: cb060026 sub x6, x1, x6
400a64: d1001063 sub x3, x3, #0x4
400a68: 9b257c85 smull x5, w4, w5
400a6c: 9365fca5 asr x5, x5, #37
400a70: 4b847ca4 sub w4, w5, w4, asr #31
400a74: 531f7884 lsl w4, w4, #1
400a78: eb0300df cmp x6, x3
400a7c: 540001a9 b.ls 400ab0 <scale_samples._Msve2+0x70> // b.plast
400a80: d2800003 mov x3, #0x0 // #0
400a84: 04a0e3e5 cntw x5
400a88: 05a0389f mov z31.s, w4
400a8c: 25a20fe7 whilelo p7.s, wzr, w2
400a90: a5235c1e ld1sh {z30.s}, p7/z, [x0, x3, lsl #1]
400a94: 04bf63de mul z30.s, z30.s, z31.s
400a98: 047093de asr z30.s, z30.s, #16
400a9c: e4c35c3e st1h {z30.s}, p7, [x1, x3, lsl #1]
400aa0: 8b050063 add x3, x3, x5
400aa4: 25a20c67 whilelo p7.s, w3, w2
400aa8: 54ffff41 b.ne 400a90 <scale_samples._Msve2+0x50> // b.any
400aac: d65f03c0 ret
400ab0: d37f7c42 ubfiz x2, x2, #1, #32
400ab4: d2800005 mov x5, #0x0 // #0
400ab8: d503201f nop
400abc: d503201f nop
400ac0: 78e56803 ldrsh w3, [x0, x5]
400ac4: 1b047c63 mul w3, w3, w4
400ac8: 13107c63 asr w3, w3, #16
400acc: 78256823 strh w3, [x1, x5]
400ad0: 910008a5 add x5, x5, #0x2
400ad4: eb05005f cmp x2, x5
400ad8: 54ffff41 b.ne 400ac0 <scale_samples._Msve2+0x80> // b.any
400adc: d65f03c0 ret
Example of scale_samples.resolver:
0000000000400b20 <scale_samples.resolver>:
400b20: a9bf7bfd stp x29, x30, [sp, #-16]!
400b24: 910003fd mov x29, sp
400b28: 94000120 bl 400fa8 <__init_cpu_features_resolver>
400b2c: 90000100 adrp x0, 420000 <__libc_start_main@GLIBC_2.34>
400b30: f9402c00 ldr x0, [x0, #88]
400b34: b62000a0 tbz x0, #36, 400b48 <scale_samples.resolver+0x28>
400b38: 90000000 adrp x0, 400000 <__abi_tag-0x254>
400b3c: 91290000 add x0, x0, #0xa40
400b40: a8c17bfd ldp x29, x30, [sp], #16
400b44: d65f03c0 ret
400b48: 90000000 adrp x0, 400000 <__abi_tag-0x254>
400b4c: 91238000 add x0, x0, #0x8e0
400b50: 17fffffc b 400b40 <scale_samples.resolver+0x20>
Final Clone-Prune Analysis Pass Code
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "basic-block.h"
#include "function.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "tree-pass.h"
#include "context.h"
#include <string.h>
namespace {
const pass_data count_pass_data = {
GIMPLE_PASS,
"count",
OPTGROUP_NONE,
TV_NONE,
PROP_gimple_any,
0, 0, 0, 0
};
static bool seen_clone = false;
static char stored_base[256] = "";
static int stored_bb_count = 0;
static int stored_gimple_count = 0;
class count_pass : public gimple_opt_pass {
public:
count_pass(gcc::context *ctxt)
: gimple_opt_pass(count_pass_data, ctxt) {}
bool gate(function *fun) override {
(void) fun;
return true;
}
unsigned int execute(function *fun) override {
const char *full_name = function_name(fun);
char base_name[256] = "";
bool is_resolver = (strstr(full_name, "resolver") != nullptr);
const char *dot = strchr(full_name, '.');
if (dot != nullptr) {
size_t len = dot - full_name;
if (len >= sizeof(base_name))
len = sizeof(base_name) - 1;
strncpy(base_name, full_name, len);
base_name[len] = '\0';
fprintf(stderr, "Detected clone for function: %s (full name: %s)\n",
base_name, full_name);
} else {
strncpy(base_name, full_name, sizeof(base_name) - 1);
base_name[sizeof(base_name) - 1] = '\0';
fprintf(stderr, "Processing original function (for clone comparison): %s\n", full_name);
}
// Count basic blocks and GIMPLE statements.
int bb_count = 0, gimple_count = 0;
basic_block bb;
FOR_EACH_BB_FN(bb, fun) {
for (gimple_stmt_iterator gsi = gsi_start_bb(bb);
!gsi_end_p(gsi);
gsi_next(&gsi)) {
gimple_count++;
}
bb_count++;
}
// Only perform clone comparison for non-resolver functions.
if (!is_resolver) {
if (!seen_clone) {
seen_clone = true;
strncpy(stored_base, base_name, sizeof(stored_base) - 1);
stored_base[sizeof(stored_base) - 1] = '\0';
stored_bb_count = bb_count;
stored_gimple_count = gimple_count;
} else {
if (strcmp(stored_base, base_name) == 0 &&
stored_bb_count == bb_count &&
stored_gimple_count == gimple_count) {
// They are substantially the same.
if (dump_file)
fprintf(dump_file, "PRUNE: %s\n", stored_base);
else
fprintf(stderr, "PRUNE: %s\n", stored_base);
} else {
if (dump_file)
fprintf(dump_file, "NOPRUNE: %s\n", stored_base);
else
fprintf(stderr, "NOPRUNE: %s\n", stored_base);
}
seen_clone = false;
}
} else {
fprintf(stderr, "Skipping resolver function: %s\n", full_name);
}
if (dump_file)
fprintf(dump_file, "Function %s: %d BBs, %d GIMPLE stmts\n",
full_name, bb_count, gimple_count);
else
fprintf(stderr, "Function %s: %d BBs, %d GIMPLE stmts\n",
full_name, bb_count, gimple_count);
return 0;
}
};
}
opt_pass *make_count_pass(gcc::context *ctxt) {
return new count_pass(ctxt);
}
Comments
Post a Comment