diff --git a/docs/writeup.txt b/docs/writeup.txt index 2346d70..8901464 100644 --- a/docs/writeup.txt +++ b/docs/writeup.txt @@ -13,3 +13,8 @@ precise per-instruction labels are required at RTL level -> attach notes to any RTL instruction that accesses relevant structs -> notes are propagated throughout RTL optimizations -> at the very end of RTL handling, discover notes and add assembly labels + +Constant foldable expressions, e.g. "size_t myOffset = ((size_t)&((struct task_struct*)0)->tasks);", are invisible + -> gcc folds those expressions before PLUGIN_FINISH_PARSE_FUNCTION + -> any recoverability of COMPONENT_REF is impossible + -> requires gcc patch (fairly small one though) diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index e17c2fb..bb02e15 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(selfpatch-slr SHARED main.cpp attrib.cpp access_discover.cpp) +add_library(selfpatch-slr SHARED main.cpp attrib.cpp access_discover.cpp pattern.cpp) set_target_properties(selfpatch-slr PROPERTIES PREFIX "") target_compile_definitions(selfpatch-slr PRIVATE _GNU_SOURCE) diff --git a/plugin/access_discover.cpp b/plugin/access_discover.cpp index 7d0d011..0829c9f 100644 --- a/plugin/access_discover.cpp +++ b/plugin/access_discover.cpp @@ -1,4 +1,5 @@ #include "access_discover.h" +#include "pattern.h" #include @@ -35,45 +36,11 @@ static void print_gimple_statement(gimple* stmt) { } static int scan_gimple_statement(const char* funcname, gimple_stmt_iterator* gsi) { - /* - gimple *stmt = gsi_stmt(gsi); - scan_stmt_for_offsetof(funcname, &gsi); - for (unsigned i = 0; i < gimple_num_ops(stmt); ++i) { - tree op = gimple_op(stmt, i); - scan_tree_for_components(op, funcname, &gsi); - } - - Build map: location->LocationPattern - Build GimpleStatementPattern tree from individual gimple statement - Add GimpleStatementPattern to LocationPattern (attached at matching variable names (potentially unnamed ssa)) - Patterns include markers for member offsets (type, member, value) - Later load a set of RTLInstructionPattern to match the patterns - */ - print_gimple_statement(gsi_stmt(*gsi)); - - gimple* stmt = gsi_stmt(*gsi); - enum gimple_code stmt_code = gimple_code(stmt); - - switch (stmt_code) { - case GIMPLE_CALL: - // check for offsetof, then fall through to operand scanning - case GIMPLE_ASSIGN: - case GIMPLE_COND: - case GIMPLE_LABEL: - case GIMPLE_RETURN: - return 0; - default: - return 1; - } - + //return register_gimple_statement_pattern(gsi); return 0; } -static void clean_unnecessary_locations() { - // Remove all LocationPatterns that do no do any relevant struct accesses -} - static const pass_data access_discover_pass_data = { GIMPLE_PASS, "access_discover", @@ -100,6 +67,6 @@ unsigned int access_discover_pass::execute(function* fun) { } } - clean_unnecessary_locations(); + clean_unnecessary_patterns(); return 0; } diff --git a/plugin/gcc_includes.h b/plugin/gcc_includes.h index dd1b862..2257c79 100644 --- a/plugin/gcc_includes.h +++ b/plugin/gcc_includes.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/plugin/pattern.cpp b/plugin/pattern.cpp new file mode 100644 index 0000000..a091d85 --- /dev/null +++ b/plugin/pattern.cpp @@ -0,0 +1,70 @@ +#include "pattern.h" + +#include + +class LocationPattern { + +public: + LocationPattern() {} + ~LocationPattern() {} +}; + +static std::unordered_map locations; + +int register_gimple_statement_pattern(gimple_stmt_iterator* gsi) { + gimple* stmt = gsi_stmt(*gsi); + enum gimple_code stmt_code = gimple_code(stmt); + location_t stmt_location = gimple_location(stmt); + + auto lp_it = locations.find(stmt_location); + if (lp_it == locations.end()) { + auto [new_it, success] = locations.emplace(stmt_location, LocationPattern{}); + if (!success) + return 1; + + lp_it = new_it; + } + + LocationPattern& pattern = lp_it->second; + + /* + scan_stmt_for_offsetof(funcname, &gsi); + for (unsigned i = 0; i < gimple_num_ops(stmt); ++i) { + tree op = gimple_op(stmt, i); + scan_tree_for_components(op, funcname, &gsi); + } + + Build GimpleStatementPattern tree from individual gimple statement + Add GimpleStatementPattern to LocationPattern (attached at matching variable names (potentially unnamed ssa)) + Patterns include markers for member offsets (type, member, value) + Later load a set of RTLInstructionPattern to match the patterns + */ + + switch (stmt_code) { + case GIMPLE_CALL: + // check for offsetof, then fall through to operand scanning + case GIMPLE_ASSIGN: + case GIMPLE_COND: + case GIMPLE_LABEL: + case GIMPLE_RETURN: + return 0; + default: + return 1; + } + + return 0; +} + +void clean_unnecessary_patterns() { + // TODO +} + +int register_rtl_instruction_pattern(rtx_insn* i) { + // TODO + return 0; +} + +int annotate_rtl() { + // TODO + return 0; +} diff --git a/plugin/pattern.h b/plugin/pattern.h new file mode 100644 index 0000000..53a027d --- /dev/null +++ b/plugin/pattern.h @@ -0,0 +1,7 @@ +#pragma once +#include "gcc_includes.h" + +int register_gimple_statement_pattern(gimple_stmt_iterator* gsi); // 1 +void clean_unnecessary_patterns(); // 2 +int register_rtl_instruction_pattern(rtx_insn* i); // 3 +int annotate_rtl(); // 4 diff --git a/subject/main.c b/subject/main.c index 7b2f6aa..620bb2a 100644 --- a/subject/main.c +++ b/subject/main.c @@ -75,6 +75,8 @@ int main(void) printf(" pid=%d, comm=%s\n", task->pid, task->comm); } + size_t myOffset = ((size_t)&((struct task_struct*)0)->tasks); // BROKEN, relevancy for kernel unknown + return 0; }