#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int plugin_is_GPL_compatible; #define SPSLR_OFFSETOF "__spslr_offsetof" // Recognize __attribute__((spslr)) on structures and track their layout struct Member { using OFF = unsigned long; using SIZE = unsigned long; static constexpr int FLAG_DANGERZONE = 1; OFF offset = 0; SIZE size = 0; int flags = 0; }; struct Target { using UID = unsigned long; UID uid; std::map members; void log_member(const Member& member); const Member* get_member(Member::OFF offset) const; }; struct TargetTree { tree t; Target::UID uid; }; static Target::UID next_target_uid = 0; static std::unordered_map targets; static std::list target_trees; static std::unordered_set target_markings; static bool find_target_log(tree t, Target::UID& uid) { for (const TargetTree& tt : target_trees) { if (lang_hooks.types_compatible_p(tt.t, t)) { uid = tt.uid; return true; } } return false; } void Target::log_member(const Member& member) { Member tmp_member; tmp_member.offset = member.offset; tmp_member.size = (member.size == 0 ? 1 : member.size); tmp_member.flags = (member.size == 0 ? Member::FLAG_DANGERZONE : 0) | member.flags; // Overlaps are dangerous -> remove and integrate into member auto overlap_end = members.lower_bound(tmp_member.offset + tmp_member.size); for (auto it = std::make_reverse_iterator(overlap_end); it != members.rend();) { const Member& existing_member = it->second; if (existing_member.offset + existing_member.size <= tmp_member.offset) break; Member::OFF combined_end = std::max(tmp_member.offset + tmp_member.size, existing_member.offset + existing_member.size); Member::OFF combined_offset = std::min(tmp_member.offset, existing_member.offset); Member::SIZE combined_size = combined_end - combined_offset; tmp_member.flags |= (existing_member.flags | Member::FLAG_DANGERZONE); tmp_member.offset = combined_offset; tmp_member.size = combined_size; // Erase overlapping member auto tmp_forward = std::prev(it.base()); tmp_forward = members.erase(tmp_forward); it = std::make_reverse_iterator(tmp_forward); } members.emplace(tmp_member.offset, tmp_member); } const Member* Target::get_member(Member::OFF offset) const { auto it = members.find(offset); if (it == members.end()) return nullptr; return &it->second; } static void log_target(tree node) { node = TYPE_MAIN_VARIANT(node); if (!node) return; if (TREE_CODE(node) != RECORD_TYPE) return; auto marking = target_markings.find(node); if (marking == target_markings.end()) return; Target::UID existing_target; if (find_target_log(node, existing_target)) return; Target::UID tuid = next_target_uid++; TargetTree new_tt; new_tt.t = node; new_tt.uid = tuid; target_trees.push_back(new_tt); targets.emplace(tuid, Target{}); Target& target = targets.at(tuid); target.uid = tuid; // Log all members for (tree field = TYPE_FIELDS(node); field; field = DECL_CHAIN(field)) { if (TREE_CODE(field) != FIELD_DECL) continue; HOST_WIDE_INT field_byte_offset = 0; if (TREE_CODE(DECL_FIELD_OFFSET(field)) == INTEGER_CST) field_byte_offset = tree_to_uhwi(DECL_FIELD_OFFSET(field)); HOST_WIDE_INT field_bit_offset = 0; if (TREE_CODE(DECL_FIELD_BIT_OFFSET(field)) == INTEGER_CST) field_bit_offset = tree_to_uhwi(DECL_FIELD_BIT_OFFSET(field)); HOST_WIDE_INT field_bit_offset_bytes = field_bit_offset / 8; field_byte_offset += field_bit_offset_bytes; field_bit_offset -= field_bit_offset_bytes * 8; HOST_WIDE_INT field_bit_size = 0; if (TREE_CODE(DECL_SIZE(field)) == INTEGER_CST) field_bit_size = tree_to_uhwi(DECL_SIZE(field)); bool is_bitfield = (DECL_BIT_FIELD_TYPE(field) != NULL_TREE); bool is_multibyte = (field_bit_size % 8 == 0 && field_bit_offset == 0); bool is_dangerous = (is_bitfield || !is_multibyte); HOST_WIDE_INT field_offset_bit_size = field_bit_offset + field_bit_size; HOST_WIDE_INT effective_field_size = field_offset_bit_size / 8; if (field_offset_bit_size % 8 != 0) effective_field_size += 1; Member member; member.offset = (Member::OFF)field_byte_offset; member.size = (Member::SIZE)effective_field_size; member.flags = (is_dangerous ? Member::FLAG_DANGERZONE : 0); target.log_member(member); } } static tree log_target_attribute(tree* node, tree name, tree args, int flags, bool* no_add_attrs) { if (!node) return NULL_TREE; if (TREE_CODE(*node) != RECORD_TYPE) return NULL_TREE; tree type_main_variant = TYPE_MAIN_VARIANT(*node); if (!type_main_variant) return NULL_TREE; target_markings.insert(type_main_variant); return NULL_TREE; } static struct attribute_spec spslr_attribute = { "spslr", 0, 0, false, false, false, false, log_target_attribute, NULL }; void on_register_attributes(void* event_data, void* data) { register_attribute(&spslr_attribute); } static void on_type_complete(void* event_data, void* user_data) { tree type = (tree)event_data; log_target(type); } // Early hook to make COMPONENT_REF nodes survice front end struct SPSLROffsetofCallData { using UID = unsigned long; UID uid; Target::UID target; Member::OFF member; }; static tree spslr_offsetof_decl = NULL_TREE; static SPSLROffsetofCallData::UID spslr_offsetof_next_uid = 0; static std::unordered_map spslr_offsetof_calls; static bool require_spslr_offsetof_decl() { if (spslr_offsetof_decl) return true; tree param_types = tree_cons(NULL_TREE, long_unsigned_type_node, NULL_TREE); tree fntype = build_function_type(sizetype, param_types); spslr_offsetof_decl = build_fn_decl(SPSLR_OFFSETOF, fntype); if (!spslr_offsetof_decl) return false; DECL_EXTERNAL (spslr_offsetof_decl) = 1; TREE_PUBLIC (spslr_offsetof_decl) = 1; DECL_ARTIFICIAL (spslr_offsetof_decl) = 1; // Prevent VOP problems later when removing calls // Explanation -> VOPs mark memory side-effects, which these calls have none of anyways DECL_PURE_P(spslr_offsetof_decl) = 1; DECL_IS_NOVOPS(spslr_offsetof_decl) = 1; return true; } static bool is_relevant_offsetof(tree ref, Target::UID& tuid, Member::OFF& moff) { if (!ref || TREE_CODE(ref) != COMPONENT_REF) return false; tree base = TREE_OPERAND(ref, 0); if (!base) return false; tree base_type = TREE_TYPE(base); if (!base_type) return false; if (!find_target_log(base_type, tuid)) return false; auto target_it = targets.find(tuid); if (target_it == targets.end()) return false; const Target& t = target_it->second; tree field = TREE_OPERAND(ref, 1); HOST_WIDE_INT field_byte_offset = 0; if (TREE_CODE(DECL_FIELD_OFFSET(field)) == INTEGER_CST) field_byte_offset = tree_to_uhwi(DECL_FIELD_OFFSET(field)); HOST_WIDE_INT field_bit_offset = 0; if (TREE_CODE(DECL_FIELD_BIT_OFFSET(field)) == INTEGER_CST) field_bit_offset = tree_to_uhwi(DECL_FIELD_BIT_OFFSET(field)); Member::OFF effective_field_offset = (field_byte_offset + (field_bit_offset / 8)); moff = effective_field_offset; const Member* m = t.get_member(effective_field_offset); if (!m) return false; return !(m->flags & Member::FLAG_DANGERZONE); } static tree instrument_offsetof_maybe(tree ref) { SPSLROffsetofCallData call_data; if (!is_relevant_offsetof(ref, call_data.target, call_data.member)) return NULL_TREE; location_t loc = EXPR_LOCATION(ref); tree result_type = TREE_TYPE(ref); if (!require_spslr_offsetof_decl()) { std::cerr << "spslr_pinpoint -> failed to instrument COMPONENT_REF (" << SPSLR_OFFSETOF << " unavailable)" << std::endl; return NULL_TREE; } call_data.uid = spslr_offsetof_next_uid++; spslr_offsetof_calls.emplace(call_data.uid, call_data); tree call_uid_tree = build_int_cst(long_unsigned_type_node, call_data.uid); tree call_tree = build_call_expr_loc(loc, spslr_offsetof_decl, 1, call_uid_tree); // Get base as char* tree base = TREE_OPERAND(ref, 0); tree base_addr; if (TREE_CODE(base) == ADDR_EXPR) { // base_addr = base; std::cerr << "spslr_pinpoint -> unexpected ADDR_EXPR as COMPONENT_REF base!" << std::endl; return NULL_TREE; } else { base_addr = build_fold_addr_expr(base); } tree char_ptr_type = build_pointer_type(char_type_node); tree base_char_ptr = fold_convert(char_ptr_type, base_addr); // Add __spslr_offsetof, cast back to field pointer, then dereference tree plus = build2_loc(loc, POINTER_PLUS_EXPR, char_ptr_type, base_char_ptr, call_tree); tree field_ptr_type = build_pointer_type(result_type); tree cast_back = build1_loc(loc, NOP_EXPR, field_ptr_type, plus); tree new_ref = build1_loc(loc, INDIRECT_REF, result_type, cast_back); return new_ref; } static void on_build_component_ref(void* event_data, void* user_data) { tree* component_ref_node = (tree*)event_data; if (!component_ref_node) return; tree repl = instrument_offsetof_maybe(*component_ref_node); if (repl) *component_ref_node = repl; } // Identify any missing COMPONENT_REFs (e.g. from CONSTRUCTOR trees) static const pass_data log_component_refs_pass_data = { GIMPLE_PASS, "log_component_refs", OPTGROUP_NONE, TV_NONE, 0,0,0,0, TODO_update_ssa }; struct log_component_refs_pass : gimple_opt_pass { log_component_refs_pass(gcc::context *ctxt); unsigned int execute(function* fun) override; }; log_component_refs_pass::log_component_refs_pass(gcc::context *ctxt) : gimple_opt_pass(log_component_refs_pass_data, ctxt) {} struct GCRChain { struct Link { tree t = NULL_TREE; bool relevant = false; SPSLROffsetofCallData call_data; }; bool relevant = false; std::list links; tree base = NULL_TREE; }; static tree walk_tree_contains_component_ref(tree* tp, int* walk_subtrees, void* data) { int* found_flag = (int*)data; if (!tp || !*tp) return NULL_TREE; if (TREE_CODE(*tp) == COMPONENT_REF) *found_flag = 1; return NULL_TREE; } static bool contains_component_ref(tree ref) { int found_flag = 0; walk_tree(&ref, walk_tree_contains_component_ref, &found_flag, NULL); return found_flag != 0; } static bool gimple_component_ref_chain(tree ref, GCRChain& chain) { if (!ref) return false; if (TREE_CODE(ref) != COMPONENT_REF) { if (chain.links.empty()) return false; if (contains_component_ref(ref)) return false; chain.base = ref; return true; } GCRChain::Link link; link.t = ref; link.relevant = is_relevant_offsetof(ref, link.call_data.target, link.call_data.member); if (link.relevant) { link.call_data.uid = spslr_offsetof_next_uid++; chain.relevant = true; } chain.links.push_front(link); return gimple_component_ref_chain(TREE_OPERAND(ref, 0), chain); } static HOST_WIDE_INT get_field_offset_bits(tree field) { gcc_assert(TREE_CODE(field) == FIELD_DECL); // Get the offset expression (may not be a constant early in compilation) tree offset_tree = DECL_FIELD_OFFSET(field); HOST_WIDE_INT bitpos_within_unit = tree_to_uhwi(DECL_FIELD_BIT_OFFSET(field)); // Convert the byte offset to bits HOST_WIDE_INT bit_offset = bitpos_within_unit; if (offset_tree && TREE_CODE(offset_tree) == INTEGER_CST) bit_offset += tree_to_shwi(offset_tree) * BITS_PER_UNIT; return bit_offset; } static bool get_field_offset(tree field, Member::OFF& off) { if (!field || TREE_CODE(field) != FIELD_DECL) return false; if (DECL_BIT_FIELD(field)) return false; tree byte_offset_tree = DECL_FIELD_OFFSET(field); tree bit_offset_tree = DECL_FIELD_BIT_OFFSET(field); if (!byte_offset_tree || !bit_offset_tree) return false; if (TREE_CODE(byte_offset_tree) != INTEGER_CST || TREE_CODE(bit_offset_tree) != INTEGER_CST) return false; HOST_WIDE_INT byte_offset = tree_to_uhwi(byte_offset_tree); HOST_WIDE_INT bit_offset = tree_to_uhwi(bit_offset_tree); if (bit_offset % 8 != 0) return false; off = (Member::OFF)byte_offset + (Member::OFF)bit_offset / 8; return true; } static tree gimple_instrument_offsetof_maybe(tree ref, gimple_stmt_iterator* gsi) { GCRChain gcrc; if (!gimple_component_ref_chain(ref, gcrc)) { std::cerr << "spslr_pinpoint -> failed to parse COMPONENT_REF chain" << std::endl; return NULL_TREE; } if (!gcrc.relevant) return NULL_TREE; // Store base pointer in a temporary variable (as char*) tree char_ptr_type = build_pointer_type(char_type_node); tree base_tmp = NULL_TREE; { tree base_ptr; if (TREE_CODE(gcrc.base) == ADDR_EXPR) { std::cerr << "spslr_pinpoint -> unexpected ADDR_EXPR as COMPONENT_REF base!" << std::endl; return NULL_TREE; } else { base_ptr = build_fold_addr_expr(gcrc.base); } tree base_char_ptr = fold_convert(char_ptr_type, base_ptr); base_tmp = create_tmp_var(char_ptr_type, NULL); gimple* base_tmp_assignment = gimple_build_assign(base_tmp, base_char_ptr); gsi_insert_before(gsi, base_tmp_assignment, GSI_SAME_STMT); } // For each component ref in chain, add the member offset to the pointer for (const GCRChain::Link& cr : gcrc.links) { // NOTE -> Could fold into single call here (needs to track what offsets contribute, +irrelevant combined) if (cr.relevant) { // Add offsetof call if (!require_spslr_offsetof_decl()) { std::cerr << "spslr_pinpoint -> failed to instrument COMPONENT_REF (" << SPSLR_OFFSETOF << " unavailable)" << std::endl; return NULL_TREE; } spslr_offsetof_calls.emplace(cr.call_data.uid, cr.call_data); // Call to __spslr_offsetof is a separate statement tree call_uid_arg = build_int_cst(long_unsigned_type_node, cr.call_data.uid); gimple* call_stmt = gimple_build_call(spslr_offsetof_decl, 1, call_uid_arg); tree offset_tmp = create_tmp_var(size_type_node, NULL); gimple_call_set_lhs(call_stmt, offset_tmp); gsi_insert_before(gsi, call_stmt, GSI_SAME_STMT); // Add call return value to current base pointer tree addition = build2(POINTER_PLUS_EXPR, char_ptr_type, base_tmp, offset_tmp); base_tmp = create_tmp_var(char_ptr_type, NULL); gimple* addition_assignment = gimple_build_assign(base_tmp, addition); gsi_insert_before(gsi, addition_assignment, GSI_SAME_STMT); } else { // Add offsetof contant tree field = TREE_OPERAND(cr.t, 1); Member::OFF offset = 0; if (!get_field_offset(field, offset)) { std::cerr << "spslr_pinpoint -> failed to get offset of an irrelevant member " << "in a relevant COMPONENT_REF chain" << std::endl; return NULL_TREE; } // Add constant offset to current base pointer tree addition = build2(POINTER_PLUS_EXPR, char_ptr_type, base_tmp, build_int_cst(sizetype, offset)); base_tmp = create_tmp_var(char_ptr_type, NULL); gimple* addition_assignment = gimple_build_assign(base_tmp, addition); gsi_insert_before(gsi, addition_assignment, GSI_SAME_STMT); } } // Cast char pointer back to field pointer and dereference tree field_ptr_type = build_pointer_type(TREE_TYPE(ref)); tree field_ptr = fold_convert(field_ptr_type, base_tmp); tree field_ptr_tmp = create_tmp_var(field_ptr_type, NULL); gimple* field_ptr_tmp_assignment = gimple_build_assign(field_ptr_tmp, field_ptr); gsi_insert_before(gsi, field_ptr_tmp_assignment, GSI_SAME_STMT); tree offset0 = fold_convert(field_ptr_type, build_int_cst(sizetype, 0)); tree result_ref = build2(MEM_REF, TREE_TYPE(ref), field_ptr_tmp, offset0); return result_ref; } static void instrument_tree(const std::list& path, gimple_stmt_iterator* gsi, unsigned& cancel_levels) { if (path.empty() || !gsi) return; tree ref = *path.back(); if (!ref || TREE_CODE(ref) != COMPONENT_REF) return; cancel_levels = 1; tree instrumented_ref = gimple_instrument_offsetof_maybe(ref, gsi); if (!instrumented_ref) return; gimple_set_modified(gsi_stmt(*gsi), true); *path.back() = instrumented_ref; // At this point, instrumented_ref is a MEM_REF node (off=0). A wrapping ADDR_EXPR cancels it out. if (path.size() < 2) return; tree* parent = *(++path.rbegin()); if (TREE_CODE(*parent) == ADDR_EXPR) { // Note -> the base of the MEM_REF is expected to have the same type as the ADDR_EXPR *parent = TREE_OPERAND(instrumented_ref, 0); cancel_levels++; } } struct TreeWalkData { std::list path; gimple_stmt_iterator* gsi; unsigned cancel_levels; }; static tree walk_tree_level(tree* tp, int* walk_subtrees, void* data) { TreeWalkData* twd = (TreeWalkData*)data; if (!twd) return NULL_TREE; if (!twd->path.empty() && twd->path.back() == tp) return NULL_TREE; // root of this level if (walk_subtrees) *walk_subtrees = 0; twd->cancel_levels = 0; twd->path.push_back(tp); instrument_tree(twd->path, twd->gsi, twd->cancel_levels); if (twd->cancel_levels == 0) walk_tree(tp, walk_tree_level, data, NULL); twd->path.pop_back(); if (twd->cancel_levels > 0) twd->cancel_levels--; // Cancel current level if there are still cancel_levels due return twd->cancel_levels == 0 ? NULL_TREE : *tp; } static bool walk_gimple_stmt(gimple_stmt_iterator* gsi) { if (!gsi || gsi_end_p(*gsi)) return false; gimple* stmt = gsi_stmt(*gsi); for (int i = 0; i < gimple_num_ops(stmt); i++) { tree* op = gimple_op_ptr(stmt, i); if (!op || !*op) continue; TreeWalkData twd; twd.gsi = gsi; walk_tree_level(op, NULL, &twd); } return true; } unsigned int log_component_refs_pass::execute(function* fun) { const char* funcname = fun->decl && DECL_NAME(fun->decl) ? IDENTIFIER_POINTER(DECL_NAME(fun->decl)) : ""; tree fndecl = fun->decl; if (!fndecl) return 0; gimple_seq body = gimple_body(fndecl); for (gimple_stmt_iterator gsi = gsi_start(body); walk_gimple_stmt(&gsi); gsi_next(&gsi)); // print_gimple_seq(stderr, gimple_body(fun->decl), 0, TDF_NONE); return 0; } // Gimple print pass for debugging static const pass_data print_pass_data = { GIMPLE_PASS, "gimple_print", OPTGROUP_NONE, TV_NONE, 0,0,0,0, 0 }; struct print_pass : gimple_opt_pass { print_pass(gcc::context *ctxt); unsigned int execute(function* fun) override; }; print_pass::print_pass(gcc::context *ctxt) : gimple_opt_pass(print_pass_data, ctxt) {} unsigned int print_pass::execute(function* fun) { const char* funcname = fun->decl && DECL_NAME(fun->decl) ? IDENTIFIER_POINTER(DECL_NAME(fun->decl)) : ""; fprintf(stderr, "[spslr_pinpoint] Function: %s\n", funcname); basic_block bb; FOR_EACH_BB_FN(bb, fun) { for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { gimple* stmt = gsi_stmt(gsi); print_gimple_stmt(stderr, stmt, 0, TDF_SLIM); } } fprintf(stderr, "\n"); return 0; } // Hook everything up in plugin_init int plugin_init (struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) { if (!plugin_default_version_check(version, &gcc_version)) { fprintf(stderr, "GCC version mismatch!\n"); std::cerr << "spslr_pinpoint -> GCC version mismatch" << std::endl; return 1; } register_callback(plugin_info->base_name, PLUGIN_ATTRIBUTES, on_register_attributes, NULL); register_callback(plugin_info->base_name, PLUGIN_FINISH_TYPE, on_type_complete, NULL); register_callback(plugin_info->base_name, PLUGIN_BUILD_COMPONENT_REF, on_build_component_ref, NULL); struct register_pass_info log_component_refs_pass_info; log_component_refs_pass_info.pass = new log_component_refs_pass(nullptr); log_component_refs_pass_info.ref_pass_instance_number = 1; log_component_refs_pass_info.reference_pass_name = "cfg"; log_component_refs_pass_info.pos_op = PASS_POS_INSERT_BEFORE; register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &log_component_refs_pass_info); struct register_pass_info print_pass_info; print_pass_info.pass = new print_pass(nullptr); print_pass_info.ref_pass_instance_number = 1; print_pass_info.reference_pass_name = "cfg"; print_pass_info.pos_op = PASS_POS_INSERT_AFTER; register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, nullptr, &print_pass_info); return 0; }