From ad73897a6c3d4a805bf922c4c8ad9729b59ef9d4 Mon Sep 17 00:00:00 2001 From: York Jasper Niebuhr Date: Thu, 23 Oct 2025 00:56:16 +0200 Subject: [PATCH] spslr_pinpoint labels --- playground/spslr_pinpoint.cpp | 324 +++++++++++++++++++++++++++++++--- 1 file changed, 295 insertions(+), 29 deletions(-) diff --git a/playground/spslr_pinpoint.cpp b/playground/spslr_pinpoint.cpp index 14c49f4..b285bbf 100644 --- a/playground/spslr_pinpoint.cpp +++ b/playground/spslr_pinpoint.cpp @@ -11,7 +11,7 @@ #include #include #include - +#include #include #include #include @@ -19,10 +19,20 @@ #include #include #include +#include +#include +#include +#include int plugin_is_GPL_compatible; -#define SPSLR_OFFSETOF "__spslr_offsetof" +#ifndef SPSLR_OFFSETOF +#define SPSLR_OFFSETOF "__spslr_offsetof_" /* with suffix */ +#endif + +#ifndef UNSPEC_SPSLR_OFFSETOF +#define UNSPEC_SPSLR_OFFSETOF 1042 +#endif // Recognize __attribute__((spslr)) on structures and track their layout @@ -215,31 +225,33 @@ struct SPSLROffsetofCallData { Member::OFF member; }; -static tree spslr_offsetof_decl = NULL_TREE; static SPSLROffsetofCallData::UID spslr_offsetof_next_uid = 0; static std::unordered_map spslr_offsetof_calls; -static bool require_spslr_offsetof_decl() { - if (spslr_offsetof_decl) - return true; +static tree make_spslr_offsetof_decl(SPSLROffsetofCallData::UID uid) { + tree fntype = build_function_type(sizetype, NULL_TREE); - tree param_types = tree_cons(NULL_TREE, long_unsigned_type_node, NULL_TREE); - tree fntype = build_function_type(sizetype, param_types); + char fnname[128]; + snprintf(fnname, sizeof(fnname), "%s%lu", SPSLR_OFFSETOF, uid); - spslr_offsetof_decl = build_fn_decl(SPSLR_OFFSETOF, fntype); - if (!spslr_offsetof_decl) - return false; + tree fnname_tree = get_identifier(fnname); + if (!fnname_tree) + return NULL_TREE; - DECL_EXTERNAL (spslr_offsetof_decl) = 1; - TREE_PUBLIC (spslr_offsetof_decl) = 1; - DECL_ARTIFICIAL (spslr_offsetof_decl) = 1; + tree fndecl = build_fn_decl(IDENTIFIER_POINTER(fnname_tree), fntype); + if (!fndecl) + return NULL_TREE; - // Prevent VOP problems later when removing calls + DECL_EXTERNAL(fndecl) = 1; + TREE_PUBLIC(fndecl) = 1; + DECL_ARTIFICIAL(fndecl) = 1; + + // Prevent VOP problems later when removing calls // Explanation -> VOPs mark memory side-effects, which these calls have none of anyways - DECL_PURE_P(spslr_offsetof_decl) = 1; - DECL_IS_NOVOPS(spslr_offsetof_decl) = 1; + DECL_PURE_P(fndecl) = 1; + DECL_IS_NOVOPS(fndecl) = 1; - return true; + return fndecl; } static bool is_relevant_offsetof(tree ref, Target::UID& tuid, Member::OFF& moff) { @@ -291,17 +303,16 @@ static tree instrument_offsetof_maybe(tree ref) { location_t loc = EXPR_LOCATION(ref); tree result_type = TREE_TYPE(ref); - if (!require_spslr_offsetof_decl()) { - std::cerr << "spslr_pinpoint -> failed to instrument COMPONENT_REF (" << SPSLR_OFFSETOF << " unavailable)" << std::endl; + call_data.uid = spslr_offsetof_next_uid++; + + tree spslr_offsetof_decl = NULL_TREE; + if (!(spslr_offsetof_decl = make_spslr_offsetof_decl(call_data.uid))) { + std::cerr << "spslr_pinpoint -> failed to instrument COMPONENT_REF (" << SPSLR_OFFSETOF << " unavailable)" << std::endl; return NULL_TREE; } - call_data.uid = spslr_offsetof_next_uid++; - spslr_offsetof_calls.emplace(call_data.uid, call_data); - - tree call_uid_tree = build_int_cst(long_unsigned_type_node, call_data.uid); - tree call_tree = build_call_expr_loc(loc, spslr_offsetof_decl, 1, call_uid_tree); + tree call_tree = build_call_expr_loc(loc, spslr_offsetof_decl, 0); // Get base as char* tree base = TREE_OPERAND(ref, 0); @@ -494,17 +505,17 @@ static tree gimple_instrument_offsetof_maybe(tree ref, gimple_stmt_iterator* gsi // NOTE -> Could fold into single call here (needs to track what offsets contribute, +irrelevant combined) if (cr.relevant) { // Add offsetof call - if (!require_spslr_offsetof_decl()) { + tree spslr_offsetof_decl = NULL_TREE; + if (!(spslr_offsetof_decl = make_spslr_offsetof_decl(cr.call_data.uid))) { std::cerr << "spslr_pinpoint -> failed to instrument COMPONENT_REF (" - << SPSLR_OFFSETOF << " unavailable)" << std::endl; + << SPSLR_OFFSETOF << " unavailable)" << std::endl; return NULL_TREE; } spslr_offsetof_calls.emplace(cr.call_data.uid, cr.call_data); // Call to __spslr_offsetof is a separate statement - tree call_uid_arg = build_int_cst(long_unsigned_type_node, cr.call_data.uid); - gimple* call_stmt = gimple_build_call(spslr_offsetof_decl, 1, call_uid_arg); + gimple* call_stmt = gimple_build_call(spslr_offsetof_decl, 0); tree offset_tmp = create_tmp_var(size_type_node, NULL); gimple_call_set_lhs(call_stmt, offset_tmp); gsi_insert_before(gsi, call_stmt, GSI_SAME_STMT); @@ -684,6 +695,247 @@ unsigned int print_pass::execute(function* fun) { return 0; } +// At early RTL, replace __spslr_offsetof_ with UNSPECs to avoid ABI and clobbering "problems" + +static bool extract_callee_symbol (rtx call_rtx, const char** out_name) { + if (!call_rtx || GET_CODE(call_rtx) != CALL) + return false; + + rtx op0 = XEXP(call_rtx, 0); + if (!op0) + return false; + + rtx addr = XEXP(op0, 0); + if (addr && GET_CODE(addr) == SYMBOL_REF) { + *out_name = XSTR(addr, 0); + return true; + } + + return false; +} + +static bool parse_uid_from_name(const char* name, unsigned long* out_uid) { + if (!name || !out_uid) + return false; + + if (strncmp(name, SPSLR_OFFSETOF, strlen(SPSLR_OFFSETOF)) != 0) + return false; + + const char *p = name + strlen(SPSLR_OFFSETOF); + if (*p == 0) + return false; + + char *endp = nullptr; + unsigned long v = strtoul(p, &endp, 10); + if (endp == p) + return false; + + *out_uid = v; + return true; +} + +static bool extract_dest_and_call (rtx pat, rtx* out_dest, rtx* out_call) { + if (!pat) + return false; + + if (GET_CODE(pat) == SET) { + rtx src = SET_SRC(pat); + if (GET_CODE(src) == CALL) { + *out_dest = SET_DEST(pat); + *out_call = src; + return true; + } + return false; + } + + if (GET_CODE(pat) == PARALLEL) { + // Look for a SET whose src is CALL + int n = XVECLEN(pat, 0); + for (int i = 0; i < n; ++i) { + rtx elt = XVECEXP(pat, 0, i); + if (GET_CODE(elt) == SET && GET_CODE(SET_SRC(elt)) == CALL) { + *out_dest = SET_DEST(elt); + *out_call = SET_SRC(elt); + return true; + } + } + } + + return false; +} + +static void call_to_unspec(function* fn) { + if (!fn) + return; + + unsigned replaced = 0; + + basic_block bb; + FOR_EACH_BB_FN(bb, fn) { + for (rtx_insn* insn = BB_HEAD(bb); insn != NEXT_INSN(BB_END(bb)); insn = NEXT_INSN(insn)) { + if (!INSN_P(insn) || !CALL_P(insn)) + continue; + + rtx pat = PATTERN(insn); + rtx dest = NULL_RTX; + rtx call = NULL_RTX; + if (!extract_dest_and_call(pat, &dest, &call)) + continue; + + const char *name = nullptr; + if (!extract_callee_symbol(call, &name)) + continue; + + unsigned long uid = 0; + if (!parse_uid_from_name(name, &uid)) + continue; + + // We expect a returning call (assigned to dest) + if (!dest || !REG_P(dest) || GET_MODE(dest) == VOIDmode) + continue; + + machine_mode mode = GET_MODE(dest); + + // Build: (set dest (unspec:mode [(const_int uid)] UNSPEC_SPSLR_OFFSETOF)) + rtvec vec = gen_rtvec(1, GEN_INT((HOST_WIDE_INT) uid)); + rtx uns = gen_rtx_UNSPEC(mode, vec, UNSPEC_SPSLR_OFFSETOF); // Note -> maybe volatile + rtx set = gen_rtx_SET(dest, uns); + + emit_insn_before(set, insn); + delete_insn(insn); + replaced++; + } + } + + if (replaced) { + df_set_bb_dirty(ENTRY_BLOCK_PTR_FOR_FN(fn)); + df_set_bb_dirty(EXIT_BLOCK_PTR_FOR_FN(fn)); + } +} + +const pass_data call_to_unspec_pass_data = { + RTL_PASS, + "call_to_unspec", + OPTGROUP_NONE, + TV_NONE, + PROP_rtl, + 0, 0, 0, 0 +}; + +struct call_to_unspec_pass : rtl_opt_pass { + call_to_unspec_pass(gcc::context* ctxt) + : rtl_opt_pass(call_to_unspec_pass_data, ctxt) {} + + unsigned int execute(function* fn) override { + call_to_unspec(fn); + return 0; + } +}; + +// Late RTL pass replaces UNSPECs with labeled constants (must happen before vregs, no optimizations afterwards) + +static bool lookup_initial_member_offset(SPSLROffsetofCallData::UID uid, Member::OFF& ioff) { + auto call_data_it = spslr_offsetof_calls.find(uid); + if (call_data_it == spslr_offsetof_calls.end()) + return false; + + const SPSLROffsetofCallData& call_data = call_data_it->second; + + // call_data.target, call_data.member + + ioff = call_data.member; + return true; +} + +static void emit_named_asm_label_before(SPSLROffsetofCallData::UID uid, rtx_insn* before) { + char name[128]; + snprintf(name, sizeof(name), "%s%lu:\n", SPSLR_OFFSETOF, uid); + + /* Build empty operand vectors. */ + rtvec no_out = rtvec_alloc(0); + rtvec no_in = rtvec_alloc(0); + rtvec no_cl = rtvec_alloc(0); + const char* empty_constraints = ""; + + /* Location: use current insn’s location if available. */ + location_t loc = INSN_LOCATION(before); + + /* Create a zero-operand, volatile asm insn. */ + rtx asmops = gen_rtx_ASM_OPERANDS (VOIDmode, ggc_strdup(name), empty_constraints, 1, no_out, no_in, no_cl, loc); + + /* Emit it right before the target insn. This does NOT affect the CFG. */ + emit_insn_before (asmops, before); +} + +static void unspec_to_labeled_const(function* fn) { + basic_block bb; + FOR_EACH_BB_FN(bb, fn) { + for (rtx_insn *insn = BB_HEAD(bb); insn != NEXT_INSN(BB_END(bb)); insn = NEXT_INSN(insn)) { + if (!INSN_P(insn)) + continue; + + rtx pat = PATTERN(insn); + if (GET_CODE(pat) != SET) + continue; + + rtx src = SET_SRC(pat); + if (GET_CODE(src) != UNSPEC) + continue; + + if (XINT(src, 1) != UNSPEC_SPSLR_OFFSETOF) + continue; + + /* Extract UID from UNSPEC operands. */ + if (XVEC(src, 0) == NULL || XVECLEN(src, 0) < 1) + continue; + + rtx arg = XVECEXP(src, 0, 0); + if (!CONST_INT_P(arg)) + continue; + + SPSLROffsetofCallData::UID uid = (SPSLROffsetofCallData::UID)INTVAL(arg); + rtx dest = SET_DEST(pat); + + Member::OFF initial_offset = uid; // TODO -> Use value that forces 32 bit (currently uses 32 bit anyways)? + if (!lookup_initial_member_offset(uid, initial_offset)) { + std::cerr << "Failed to query initial member offset for access uid " << uid << "!" << std::endl; + return; + } + + // Generate asm label + emit_named_asm_label_before(uid, insn); + + /* 2. Replace UNSPEC with constant. */ + rtx new_set = gen_rtx_SET(dest, GEN_INT(initial_offset)); + PATTERN(insn) = new_set; + INSN_CODE(insn) = -1; /* force re-recognition */ + + std::cout << "Inserted labeled initial member offset " << initial_offset + << " for access uid " << uid << "!" << std::endl; + } + } +} + +const pass_data unspec_to_lconst_pass_data = { + RTL_PASS, + "unspec_to_lconst", + OPTGROUP_NONE, + TV_NONE, + PROP_rtl, + 0, 0, 0, 0 +}; + +struct unspec_to_lconst_pass : rtl_opt_pass { + unspec_to_lconst_pass (gcc::context *ctxt) + : rtl_opt_pass(unspec_to_lconst_pass_data, ctxt) {} + + unsigned int execute(function* fn) override { + unspec_to_labeled_const(fn); + return 0; + } +}; + + // Hook everything up in plugin_init int plugin_init (struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) { @@ -711,5 +963,19 @@ int plugin_init (struct plugin_name_args *plugin_info, struct plugin_gcc_version print_pass_info.pos_op = PASS_POS_INSERT_AFTER; register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &print_pass_info); + struct register_pass_info call_to_unspec_pass_info; + call_to_unspec_pass_info.pass = new call_to_unspec_pass(nullptr); + call_to_unspec_pass_info.reference_pass_name = "expand"; + call_to_unspec_pass_info.ref_pass_instance_number = 1; + call_to_unspec_pass_info.pos_op = PASS_POS_INSERT_AFTER; + register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &call_to_unspec_pass_info); + + struct register_pass_info unspec_to_lconst_pass_info; + unspec_to_lconst_pass_info.pass = new unspec_to_lconst_pass(nullptr); + unspec_to_lconst_pass_info.reference_pass_name = "vregs"; + unspec_to_lconst_pass_info.ref_pass_instance_number = 1; + unspec_to_lconst_pass_info.pos_op = PASS_POS_INSERT_BEFORE; + register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &unspec_to_lconst_pass_info); + return 0; }