From de97e65e4f91d0c0de2d395567e9be87e3618a00 Mon Sep 17 00:00:00 2001 From: York Jasper Niebuhr Date: Sun, 12 Oct 2025 00:49:58 +0200 Subject: [PATCH] Recognizing offsetof --- plugin/main.cpp | 139 +++++++++++++++++++++++++++++++-- subject/CMakeLists.txt | 3 +- subject/main.c | 2 +- subject/offsetof_passthrough.h | 18 +++++ 4 files changed, 154 insertions(+), 8 deletions(-) create mode 100644 subject/offsetof_passthrough.h diff --git a/plugin/main.cpp b/plugin/main.cpp index 5f8cc11..e49eb8c 100644 --- a/plugin/main.cpp +++ b/plugin/main.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -12,12 +13,22 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include int plugin_is_GPL_compatible; struct MemberAccessInfo { - tree field_decl; // FIELD_DECL des zugegriffenen Mitglieds - unsigned long label_num; + const char* sname; + unsigned long label_num; std::string funcname; location_t loc; }; @@ -83,7 +94,7 @@ static void scan_tree_for_components(tree t, const char *funcname, gimple_stmt_i const char* rec_name = safe_get_record_type_name_from_field(field); // struct name if (rec_name && is_target(rec_name)) { MemberAccessInfo info; - info.field_decl = field; + info.sname = rec_name; info.label_num = ++label_counter; info.funcname = funcname ? funcname : ""; info.loc = UNKNOWN_LOCATION; @@ -124,7 +135,10 @@ static void scan_tree_for_components(tree t, const char *funcname, gimple_stmt_i scan_tree_for_components(TREE_OPERAND(t,0), funcname, gsi); scan_tree_for_components(TREE_OPERAND(t,1), funcname, gsi); break; - case PLUS_EXPR: case MINUS_EXPR: case MULT_EXPR: case POINTER_PLUS_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + case MULT_EXPR: + case POINTER_PLUS_EXPR: scan_tree_for_components(TREE_OPERAND(t,0), funcname, gsi); scan_tree_for_components(TREE_OPERAND(t,1), funcname, gsi); break; @@ -133,6 +147,90 @@ static void scan_tree_for_components(tree t, const char *funcname, gimple_stmt_i } } +void scan_stmt_for_offsetof(const char *funcname, gimple_stmt_iterator *gsi) { + gimple *stmt = gsi_stmt(*gsi); + if (!is_gimple_call(stmt)) + return; + + tree callee = gimple_call_fn(stmt); + if (!callee) + return; + + if (TREE_CODE(callee) == ADDR_EXPR) + callee = TREE_OPERAND(callee, 0); + + if (!callee || TREE_CODE(callee) != FUNCTION_DECL) + return; + + const char* name = IDENTIFIER_POINTER(DECL_NAME(callee)); + if (!name || strcmp(name, "__spslr_offsetof") != 0) + return; + + tree arg_type = gimple_call_arg(stmt, 0); // struct X + tree arg_member = gimple_call_arg(stmt, 1); // member + tree arg_value = gimple_call_arg(stmt, 2); // (likely folded) offsetof value + + if (TREE_CODE(arg_type) == ADDR_EXPR) + arg_type = TREE_OPERAND(arg_type, 0); + + if (TREE_CODE(arg_member) == ADDR_EXPR) + arg_member = TREE_OPERAND(arg_member, 0); + + if (TREE_CODE(arg_type) != STRING_CST || TREE_CODE(arg_member) != STRING_CST) { + std::cout << "Failed to parse __spslr_offsetof arguments!" << std::endl; + return; + } + + const char* type_str = TREE_STRING_POINTER(arg_type); // includes "struct " + const char* member_str = TREE_STRING_POINTER(arg_member); + + // Remove internal call + tree lhs = gimple_call_lhs(stmt); + tree val = gimple_call_arg(stmt, 2); + + if (!lhs) { + gsi_remove(gsi, true); // returns void -> just remove call entirely + return; + } + + val = fold_convert(TREE_TYPE(lhs), val); + + gassign *as = gimple_build_assign(lhs, val); + gsi_replace(gsi, as, true); + + /* + TODO -> type_str contains "struct " but target set does not + if (!is_target(type_str)) + return; + */ + + // Insert label + MemberAccessInfo info; + info.sname = type_str; + info.label_num = ++label_counter; + info.funcname = funcname ? funcname : ""; + info.loc = UNKNOWN_LOCATION; + g_member_accesses.push_back(info); + + char buf[64]; + snprintf(buf, sizeof(buf), "mylabel_%lu:", info.label_num); + + vec* inputs = nullptr; + vec* outputs = nullptr; + vec* clobbers = nullptr; + vec* labels = nullptr; + + gimple *asm_stmt = gimple_build_asm_vec( + ggc_strdup(buf), + inputs, + outputs, + clobbers, + labels + ); + + gsi_insert_before(gsi, asm_stmt, GSI_SAME_STMT); +} + // ------------------------ // GIMPLE-Pass-Ausführung // ------------------------ @@ -143,9 +241,34 @@ const pass_data gimplabels_pass_data = { "gimplabels", OPTGROUP_NONE, TV_NONE, - 0,0,0,0,0 + 0,0,0,0, + TODO_update_ssa | TODO_cleanup_cfg | TODO_verify_il }; +static void +plugin_debug_tree(tree t, int depth = 2) +{ + if (!t) + return; + + for (int i = 0; i < depth; ++i) + std::cout << " "; + + std::cout << get_tree_code_name(TREE_CODE(t)); + + if (TREE_CODE(t) == FIELD_DECL && DECL_NAME(t)) + std::cout << " <" << IDENTIFIER_POINTER(DECL_NAME(t)) << ">"; + else if (TREE_CODE(t) == SSA_NAME && SSA_NAME_VAR(t) + && DECL_NAME(SSA_NAME_VAR(t))) + std::cout << " "; + + std::cout << std::endl; + + /* Recurse over child operands */ + for (int i = 0; i < TREE_CODE_LENGTH(TREE_CODE(t)); ++i) + plugin_debug_tree(TREE_OPERAND(t, i), depth + 2); +} + struct gimplabels_pass : gimple_opt_pass { gimplabels_pass(gcc::context *ctxt) : gimple_opt_pass(gimplabels_pass_data, ctxt) @@ -156,20 +279,24 @@ struct gimplabels_pass : gimple_opt_pass { ? IDENTIFIER_POINTER(DECL_NAME(fun->decl)) : ""; + std::cout << std::endl << "Handling function \"" << funcname << "\"..." << std::endl; // Iteration über alle Grundblöcke und Anweisungen basic_block bb; FOR_EACH_BB_FN(bb, fun) { for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { gimple *stmt = gsi_stmt(gsi); + print_gimple_stmt (stdout, stmt, 0, TDF_NONE); + scan_stmt_for_offsetof(funcname, &gsi); for (unsigned i = 0; i < gimple_num_ops(stmt); ++i) { tree op = gimple_op(stmt, i); + plugin_debug_tree(op); scan_tree_for_components(op, funcname, &gsi); } } } - std::cout << "Number of accesses: " << g_member_accesses.size() << std::endl; + std::cout << "Number of accesses: " << g_member_accesses.size() << std::endl; return 0; } }; diff --git a/subject/CMakeLists.txt b/subject/CMakeLists.txt index 5819276..342ca19 100644 --- a/subject/CMakeLists.txt +++ b/subject/CMakeLists.txt @@ -1,3 +1,4 @@ add_executable(subject main.c) add_dependencies(subject selfpatch-slr) -target_compile_options(subject PRIVATE "-fplugin=$") +target_compile_options(subject PRIVATE -fplugin=$ + -include ${CMAKE_CURRENT_SOURCE_DIR}/offsetof_passthrough.h) diff --git a/subject/main.c b/subject/main.c index 0b88483..094b98c 100644 --- a/subject/main.c +++ b/subject/main.c @@ -33,7 +33,7 @@ int main(int argc, char** argv) { struct B* someB = (struct B*)malloc(sizeof(struct B)); float* someB_m4 = &someB->m4; - container_of(someB_m4, struct B, m4)->m1 = 'x'; + container_of(someB_m4, struct B, m4)->m1 = 'x'; // the "sub 0x..." is currently not recognized via ADDR_EXPR return 0; } diff --git a/subject/offsetof_passthrough.h b/subject/offsetof_passthrough.h new file mode 100644 index 0000000..a3be078 --- /dev/null +++ b/subject/offsetof_passthrough.h @@ -0,0 +1,18 @@ +#ifndef PLUGIN_OFFSETOF_H +#define PLUGIN_OFFSETOF_H + +#include /* defines offsetof to be __builtin_offsetof */ + +__attribute__((const, noinline)) +static size_t __spslr_offsetof(const char *t, const char *m, size_t v) { + /* never executed; replaced by plugin */ + (void)t; (void)m; (void)v; + return v; +} + +/* Replace __builtin_offsetof with encoding of type and member that survives C frontend */ +#undef __builtin_offsetof +#define __builtin_offsetof(T,M) \ + ((size_t (*)(const char*, const char*, size_t))__spslr_offsetof)((#T), (#M), (((size_t)&((T*)0)->M))) + +#endif