selfpatch-slr/playground/spslr_pinpoint.cpp

656 lines
18 KiB
C++

#include <iostream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <map>
#include <list>
#include <gcc-plugin.h>
#include <plugin-version.h>
#include <tree.h>
#include <langhooks.h>
#include <gimple.h>
#include <gimplify.h>
#include <stringpool.h>
#include <tree-pass.h>
#include <gimple-iterator.h>
#include <gimple-pretty-print.h>
#include <tree-pretty-print.h>
#include <tree-dump.h>
#include <print-tree.h>
#include <tree-iterator.h>
#include <rtl.h>
#include <memmodel.h>
#include <emit-rtl.h>
#include <df.h>
int plugin_is_GPL_compatible;
#ifndef SPSLR_OFFSETOF
#define SPSLR_OFFSETOF "__spslr_offsetof_" /* with suffix <uid> */
#endif
#ifndef UNSPEC_SPSLR_OFFSETOF
#define UNSPEC_SPSLR_OFFSETOF 1042
#endif
// Identify any missing COMPONENT_REFs (e.g. from CONSTRUCTOR trees)
static const pass_data log_component_refs_pass_data = {
GIMPLE_PASS,
"log_component_refs",
OPTGROUP_NONE,
TV_NONE,
0,0,0,0,
TODO_update_ssa
};
struct log_component_refs_pass : gimple_opt_pass {
log_component_refs_pass(gcc::context *ctxt);
unsigned int execute(function* fun) override;
};
log_component_refs_pass::log_component_refs_pass(gcc::context *ctxt) : gimple_opt_pass(log_component_refs_pass_data, ctxt) {}
struct GCRChain {
struct Link {
tree t = NULL_TREE;
bool relevant = false;
SPSLROffsetofCallData call_data;
};
bool relevant = false;
std::list<Link> links;
tree base = NULL_TREE;
};
static tree walk_tree_contains_component_ref(tree* tp, int* walk_subtrees, void* data) {
int* found_flag = (int*)data;
if (!tp || !*tp)
return NULL_TREE;
if (TREE_CODE(*tp) == COMPONENT_REF)
*found_flag = 1;
return NULL_TREE;
}
static bool contains_component_ref(tree ref) {
int found_flag = 0;
walk_tree(&ref, walk_tree_contains_component_ref, &found_flag, NULL);
return found_flag != 0;
}
static bool gimple_component_ref_chain(tree ref, GCRChain& chain) {
if (!ref)
return false;
if (TREE_CODE(ref) != COMPONENT_REF) {
if (chain.links.empty())
return false;
if (contains_component_ref(ref))
return false;
chain.base = ref;
return true;
}
GCRChain::Link link;
link.t = ref;
link.relevant = is_relevant_offsetof(ref, link.call_data.target, link.call_data.member);
if (link.relevant) {
link.call_data.uid = spslr_offsetof_next_uid++;
chain.relevant = true;
}
chain.links.push_front(link);
return gimple_component_ref_chain(TREE_OPERAND(ref, 0), chain);
}
static HOST_WIDE_INT get_field_offset_bits(tree field)
{
gcc_assert(TREE_CODE(field) == FIELD_DECL);
// Get the offset expression (may not be a constant early in compilation)
tree offset_tree = DECL_FIELD_OFFSET(field);
HOST_WIDE_INT bitpos_within_unit = tree_to_uhwi(DECL_FIELD_BIT_OFFSET(field));
// Convert the byte offset to bits
HOST_WIDE_INT bit_offset = bitpos_within_unit;
if (offset_tree && TREE_CODE(offset_tree) == INTEGER_CST)
bit_offset += tree_to_shwi(offset_tree) * BITS_PER_UNIT;
return bit_offset;
}
static bool get_field_offset(tree field, Member::OFF& off) {
if (!field || TREE_CODE(field) != FIELD_DECL)
return false;
if (DECL_BIT_FIELD(field))
return false;
tree byte_offset_tree = DECL_FIELD_OFFSET(field);
tree bit_offset_tree = DECL_FIELD_BIT_OFFSET(field);
if (!byte_offset_tree || !bit_offset_tree)
return false;
if (TREE_CODE(byte_offset_tree) != INTEGER_CST || TREE_CODE(bit_offset_tree) != INTEGER_CST)
return false;
HOST_WIDE_INT byte_offset = tree_to_uhwi(byte_offset_tree);
HOST_WIDE_INT bit_offset = tree_to_uhwi(bit_offset_tree);
if (bit_offset % 8 != 0)
return false;
off = (Member::OFF)byte_offset + (Member::OFF)bit_offset / 8;
return true;
}
static tree gimple_instrument_offsetof_maybe(tree ref, gimple_stmt_iterator* gsi) {
GCRChain gcrc;
if (!gimple_component_ref_chain(ref, gcrc)) {
std::cerr << "spslr_pinpoint -> failed to parse COMPONENT_REF chain" << std::endl;
return NULL_TREE;
}
if (!gcrc.relevant)
return NULL_TREE;
// Get base pointer
tree base_tmp = NULL_TREE;
{
tree base_ptr;
if (TREE_CODE(gcrc.base) == ADDR_EXPR) {
std::cerr << "spslr_pinpoint -> unexpected ADDR_EXPR as COMPONENT_REF base!" << std::endl;
return NULL_TREE;
} else {
base_ptr = build_fold_addr_expr(gcrc.base);
}
base_tmp = base_ptr;
}
// For each component ref in chain, add the member offset to the pointer
for (const GCRChain::Link& cr : gcrc.links) {
// NOTE -> Could fold into single call here (needs to track what offsets contribute, +irrelevant combined)
if (cr.relevant) {
// Add offsetof call
tree spslr_offsetof_decl = NULL_TREE;
if (!(spslr_offsetof_decl = make_spslr_offsetof_decl(cr.call_data.uid))) {
std::cerr << "spslr_pinpoint -> failed to instrument COMPONENT_REF ("
<< SPSLR_OFFSETOF << "<uid> unavailable)" << std::endl;
return NULL_TREE;
}
spslr_offsetof_calls.emplace(cr.call_data.uid, cr.call_data);
// Call to __spslr_offsetof is a separate statement
gimple* call_stmt = gimple_build_call(spslr_offsetof_decl, 0);
tree offset_tmp = create_tmp_var(size_type_node, NULL);
gimple_call_set_lhs(call_stmt, offset_tmp);
gsi_insert_before(gsi, call_stmt, GSI_SAME_STMT);
// Add call return value to current base pointer (result is field pointer)
tree field_ptr_type = build_pointer_type(TREE_TYPE(cr.t));
tree field_ptr = build2(POINTER_PLUS_EXPR, field_ptr_type, base_tmp, offset_tmp);
base_tmp = create_tmp_var(field_ptr_type, NULL);
gimple* addition_assignment = gimple_build_assign(base_tmp, field_ptr);
gsi_insert_before(gsi, addition_assignment, GSI_SAME_STMT);
} else {
// Add offsetof contant
tree field = TREE_OPERAND(cr.t, 1);
Member::OFF offset = 0;
if (!get_field_offset(field, offset)) {
std::cerr << "spslr_pinpoint -> failed to get offset of an irrelevant member "
<< "in a relevant COMPONENT_REF chain" << std::endl;
return NULL_TREE;
}
// Add constant offset to current base pointer (result is field pointer)
tree field_ptr_type = build_pointer_type(TREE_TYPE(cr.t));
tree field_ptr = build2(POINTER_PLUS_EXPR, field_ptr_type, base_tmp, build_int_cst(sizetype, offset));
base_tmp = create_tmp_var(field_ptr_type, NULL);
gimple* addition_assignment = gimple_build_assign(base_tmp, field_ptr);
gsi_insert_before(gsi, addition_assignment, GSI_SAME_STMT);
}
}
// Current pointer is a field pointer -> dereference
tree offset0 = fold_convert(TREE_TYPE(base_tmp), build_int_cst(sizetype, 0));
tree result_ref = build2(MEM_REF, TREE_TYPE(ref), base_tmp, offset0);
return result_ref;
}
static void instrument_tree(const std::list<tree*>& path, gimple_stmt_iterator* gsi, unsigned& cancel_levels) {
if (path.empty() || !gsi)
return;
tree ref = *path.back();
if (!ref || TREE_CODE(ref) != COMPONENT_REF)
return;
cancel_levels = 1;
tree instrumented_ref = gimple_instrument_offsetof_maybe(ref, gsi);
if (!instrumented_ref)
return;
gimple_set_modified(gsi_stmt(*gsi), true);
*path.back() = instrumented_ref;
// At this point, instrumented_ref is a MEM_REF node (off=0). A wrapping ADDR_EXPR cancels it out.
if (path.size() < 2)
return;
tree* parent = *(++path.rbegin());
if (TREE_CODE(*parent) == ADDR_EXPR) {
// Note -> the base of the MEM_REF is expected to have the same type as the ADDR_EXPR
*parent = TREE_OPERAND(instrumented_ref, 0);
cancel_levels++;
}
}
struct TreeWalkData {
std::list<tree*> path;
gimple_stmt_iterator* gsi;
unsigned cancel_levels;
};
static tree walk_tree_level(tree* tp, int* walk_subtrees, void* data) {
TreeWalkData* twd = (TreeWalkData*)data;
if (!twd)
return NULL_TREE;
if (!twd->path.empty() && twd->path.back() == tp)
return NULL_TREE; // root of this level
if (walk_subtrees)
*walk_subtrees = 0;
twd->cancel_levels = 0;
twd->path.push_back(tp);
instrument_tree(twd->path, twd->gsi, twd->cancel_levels);
if (twd->cancel_levels == 0)
walk_tree(tp, walk_tree_level, data, NULL);
twd->path.pop_back();
if (twd->cancel_levels > 0)
twd->cancel_levels--;
// Cancel current level if there are still cancel_levels due
return twd->cancel_levels == 0 ? NULL_TREE : *tp;
}
static bool walk_gimple_stmt(gimple_stmt_iterator* gsi) {
if (!gsi || gsi_end_p(*gsi))
return false;
gimple* stmt = gsi_stmt(*gsi);
for (int i = 0; i < gimple_num_ops(stmt); i++) {
tree* op = gimple_op_ptr(stmt, i);
if (!op || !*op)
continue;
TreeWalkData twd;
twd.gsi = gsi;
walk_tree_level(op, NULL, &twd);
}
return true;
}
unsigned int log_component_refs_pass::execute(function* fun) {
const char* funcname = fun->decl && DECL_NAME(fun->decl)
? IDENTIFIER_POINTER(DECL_NAME(fun->decl))
: "<anonymous>";
tree fndecl = fun->decl;
if (!fndecl)
return 0;
gimple_seq body = gimple_body(fndecl);
for (gimple_stmt_iterator gsi = gsi_start(body); walk_gimple_stmt(&gsi); gsi_next(&gsi));
// print_gimple_seq(stderr, gimple_body(fun->decl), 0, TDF_NONE);
return 0;
}
// At early RTL, replace __spslr_offsetof_<uid> with UNSPECs to avoid ABI and clobbering "problems"
static bool extract_callee_symbol (rtx call_rtx, const char** out_name) {
if (!call_rtx || GET_CODE(call_rtx) != CALL)
return false;
rtx op0 = XEXP(call_rtx, 0);
if (!op0)
return false;
rtx addr = XEXP(op0, 0);
if (addr && GET_CODE(addr) == SYMBOL_REF) {
*out_name = XSTR(addr, 0);
return true;
}
return false;
}
static bool parse_uid_from_name(const char* name, unsigned long* out_uid) {
if (!name || !out_uid)
return false;
if (strncmp(name, SPSLR_OFFSETOF, strlen(SPSLR_OFFSETOF)) != 0)
return false;
const char *p = name + strlen(SPSLR_OFFSETOF);
if (*p == 0)
return false;
char *endp = nullptr;
unsigned long v = strtoul(p, &endp, 10);
if (endp == p)
return false;
*out_uid = v;
return true;
}
static bool extract_dest_and_call (rtx pat, rtx* out_dest, rtx* out_call) {
if (!pat)
return false;
if (GET_CODE(pat) == SET) {
rtx src = SET_SRC(pat);
if (GET_CODE(src) == CALL) {
*out_dest = SET_DEST(pat);
*out_call = src;
return true;
}
return false;
}
if (GET_CODE(pat) == PARALLEL) {
// Look for a SET whose src is CALL
int n = XVECLEN(pat, 0);
for (int i = 0; i < n; ++i) {
rtx elt = XVECEXP(pat, 0, i);
if (GET_CODE(elt) == SET && GET_CODE(SET_SRC(elt)) == CALL) {
*out_dest = SET_DEST(elt);
*out_call = SET_SRC(elt);
return true;
}
}
}
return false;
}
static void call_to_unspec(function* fn) {
if (!fn)
return;
unsigned replaced = 0;
basic_block bb;
FOR_EACH_BB_FN(bb, fn) {
for (rtx_insn* insn = BB_HEAD(bb); insn != NEXT_INSN(BB_END(bb)); insn = NEXT_INSN(insn)) {
if (!INSN_P(insn) || !CALL_P(insn))
continue;
rtx pat = PATTERN(insn);
rtx dest = NULL_RTX;
rtx call = NULL_RTX;
if (!extract_dest_and_call(pat, &dest, &call))
continue;
const char *name = nullptr;
if (!extract_callee_symbol(call, &name))
continue;
unsigned long uid = 0;
if (!parse_uid_from_name(name, &uid))
continue;
// We expect a returning call (assigned to dest)
if (!dest || !REG_P(dest) || GET_MODE(dest) == VOIDmode)
continue;
machine_mode mode = GET_MODE(dest);
// Build: (set dest (unspec:mode [(const_int uid)] UNSPEC_SPSLR_OFFSETOF))
rtvec vec = gen_rtvec(1, GEN_INT((HOST_WIDE_INT) uid));
rtx uns = gen_rtx_UNSPEC(mode, vec, UNSPEC_SPSLR_OFFSETOF); // Note -> maybe volatile
rtx set = gen_rtx_SET(dest, uns);
emit_insn_before(set, insn);
delete_insn(insn);
replaced++;
}
}
if (replaced) {
df_set_bb_dirty(ENTRY_BLOCK_PTR_FOR_FN(fn));
df_set_bb_dirty(EXIT_BLOCK_PTR_FOR_FN(fn));
}
}
const pass_data call_to_unspec_pass_data = {
RTL_PASS,
"call_to_unspec",
OPTGROUP_NONE,
TV_NONE,
PROP_rtl,
0, 0, 0, 0
};
struct call_to_unspec_pass : rtl_opt_pass {
call_to_unspec_pass(gcc::context* ctxt)
: rtl_opt_pass(call_to_unspec_pass_data, ctxt) {}
unsigned int execute(function* fn) override {
call_to_unspec(fn);
return 0;
}
};
// Late RTL pass replaces UNSPECs with labeled constants (must happen before vregs, no optimizations afterwards)
static bool lookup_initial_member_offset(SPSLROffsetofCallData::UID uid, Member::OFF& ioff) {
auto call_data_it = spslr_offsetof_calls.find(uid);
if (call_data_it == spslr_offsetof_calls.end())
return false;
const SPSLROffsetofCallData& call_data = call_data_it->second;
// call_data.target, call_data.member
ioff = call_data.member;
return true;
}
static void emit_named_asm_label_before(SPSLROffsetofCallData::UID uid, rtx_insn* before) {
char name[128];
snprintf(name, sizeof(name), "%s%lu:\n", SPSLR_OFFSETOF, uid);
rtvec no_out = rtvec_alloc(0);
rtvec no_in = rtvec_alloc(0);
rtvec no_cl = rtvec_alloc(0);
const char* empty_constraints = "";
location_t loc = INSN_LOCATION(before);
rtx asmops = gen_rtx_ASM_OPERANDS (VOIDmode, ggc_strdup(name), empty_constraints, 1, no_out, no_in, no_cl, loc);
emit_insn_before (asmops, before);
}
static rtx labeled_cst_mov(SPSLROffsetofCallData::UID uid, rtx dest, Member::OFF ioff, location_t loc) {
char asm_str[128];
snprintf(asm_str, sizeof(asm_str), "%s%lu:\nmov %1, %0\n", SPSLR_OFFSETOF, uid);
// rtl.def
// -> DEF_RTL_EXPR(ASM_INPUT, "asm_input", "sL", RTX_EXTRA) -> only string+location in ASM_INPUT
// -> DEF_RTL_EXPR(ASM_OPERANDS, "asm_operands", "ssiEEEL", RTX_EXTRA)
rtx desc_in1 = gen_rtx_ASM_INPUT(SImode, ggc_strdup("i"));
rtvec desc_inputs = gen_rtvec(1, desc_in1);
rtvec inputs = gen_rtvec(1, GEN_INT(ioff));
const char* desc_outputs = "=r";
rtvec outputs = gen_rtvec(1, dest);
rtvec labels = rtvec_alloc(0);
rtx asmops = gen_rtx_ASM_OPERANDS(GET_MODE(dest),
ggc_strdup(asm_str), /* template */
ggc_strdup(desc_outputs), /* output constraint */
0, /* output number */
inputs, /* vector of input RTXs */
desc_inputs, /* vector of input descriptors */
labels, /* labels (empty) */
loc); /* source location */
rtx cc_clob = gen_rtx_CLOBBER(VOIDmode, gen_rtx_REG(CCmode, 17));
rtvec vec = gen_rtvec (2, gen_rtx_SET(dest, asmops), cc_clob);
rtx parallel = gen_rtx_PARALLEL(VOIDmode, vec);
return parallel;
}
static void unspec_to_labeled_const(function* fn) {
basic_block bb;
FOR_EACH_BB_FN(bb, fn) {
for (rtx_insn *insn = BB_HEAD(bb); insn != NEXT_INSN(BB_END(bb)); insn = NEXT_INSN(insn)) {
if (!INSN_P(insn))
continue;
rtx pat = PATTERN(insn);
if (GET_CODE(pat) != SET)
continue;
rtx src = SET_SRC(pat);
if (GET_CODE(src) != UNSPEC)
continue;
if (XINT(src, 1) != UNSPEC_SPSLR_OFFSETOF)
continue;
/* Extract UID from UNSPEC operands. */
if (XVEC(src, 0) == NULL || XVECLEN(src, 0) < 1)
continue;
rtx arg = XVECEXP(src, 0, 0);
if (!CONST_INT_P(arg))
continue;
SPSLROffsetofCallData::UID uid = (SPSLROffsetofCallData::UID)INTVAL(arg);
rtx dest = SET_DEST(pat);
Member::OFF initial_offset = uid; // TODO -> Use value that forces 32 bit (currently uses 32 bit anyways)?
if (!lookup_initial_member_offset(uid, initial_offset)) {
std::cerr << "Failed to query initial member offset for access uid " << uid << "!" << std::endl;
return;
}
// TODO
PATTERN(insn) = labeled_cst_mov(uid, dest, initial_offset, INSN_LOCATION(insn));
INSN_CODE(insn) = -1;
df_insn_rescan(insn);
/*
// Generate asm label
emit_named_asm_label_before(uid, insn);
rtx new_set = gen_rtx_SET(dest, GEN_INT(initial_offset));
PATTERN(insn) = new_set;
INSN_CODE(insn) = -1; // force re-recognition
*/
std::cout << "Inserted labeled initial member offset " << initial_offset
<< " for access uid " << uid << "!" << std::endl;
}
}
}
const pass_data unspec_to_lconst_pass_data = {
RTL_PASS,
"unspec_to_lconst",
OPTGROUP_NONE,
TV_NONE,
PROP_rtl,
0, 0, 0, 0
};
struct unspec_to_lconst_pass : rtl_opt_pass {
unspec_to_lconst_pass (gcc::context *ctxt)
: rtl_opt_pass(unspec_to_lconst_pass_data, ctxt) {}
unsigned int execute(function* fn) override {
unspec_to_labeled_const(fn);
return 0;
}
};
// Hook everything up in plugin_init
int plugin_init (struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) {
if (!plugin_default_version_check(version, &gcc_version)) {
fprintf(stderr, "GCC version mismatch!\n");
std::cerr << "spslr_pinpoint -> GCC version mismatch" << std::endl;
return 1;
}
register_callback(plugin_info->base_name, PLUGIN_BUILD_COMPONENT_REF, on_build_component_ref, NULL);
struct register_pass_info log_component_refs_pass_info;
log_component_refs_pass_info.pass = new log_component_refs_pass(nullptr);
log_component_refs_pass_info.ref_pass_instance_number = 1;
log_component_refs_pass_info.reference_pass_name = "cfg";
log_component_refs_pass_info.pos_op = PASS_POS_INSERT_BEFORE;
register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &log_component_refs_pass_info);
/*
TODO
vregs is almost immediately after expand (maybe the first one)
optimizations happen afterwards (e.g. forward propagation in rtl-fwprop1)
*/
struct register_pass_info call_to_unspec_pass_info;
call_to_unspec_pass_info.pass = new call_to_unspec_pass(nullptr);
call_to_unspec_pass_info.reference_pass_name = "vregs"; // "expand";
call_to_unspec_pass_info.ref_pass_instance_number = 1;
call_to_unspec_pass_info.pos_op = PASS_POS_INSERT_AFTER;
register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &call_to_unspec_pass_info);
struct register_pass_info unspec_to_lconst_pass_info;
unspec_to_lconst_pass_info.pass = new unspec_to_lconst_pass(nullptr);
unspec_to_lconst_pass_info.reference_pass_name = "call_to_unspec"; // "vregs";
unspec_to_lconst_pass_info.ref_pass_instance_number = 1;
unspec_to_lconst_pass_info.pos_op = PASS_POS_INSERT_AFTER; // PASS_POS_INSERT_BEFORE;
register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &unspec_to_lconst_pass_info);
return 0;
}