From 156da9026012c6fa1668636001c31818d3efe25e Mon Sep 17 00:00:00 2001 From: York Jasper Niebuhr Date: Thu, 30 Oct 2025 13:07:39 +0100 Subject: [PATCH] GCC patch v2 --- README.md | 2 +- gcc_component_ref_v2.patch | 255 +++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 gcc_component_ref_v2.patch diff --git a/README.md b/README.md index ab4ab9c..2f21937 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Warning: Rough road ahead. Now to the annoying part: The GNU front-end for the C language folds offsetof-like expressions into constants. In the parser. IN. THE. PARSER. Any hooks/events available to plugins happen significantly later in the pipeline. Thus, SPSLR can not detect offsetofs, be it \_\_builtin\_offsetof or the DIY variants (`((size\_t)&((struct S\*)0)-\>m)`), using current GCC versions. -To deal with this reliably, using a custom GCC build is necessary. The required patch is provided in this repo. To use it and install the custom gcc, use these commands: +To deal with this reliably, using a custom GCC build is necessary. The required patch is provided in this repo (do NOT use the v2 yet!). To use it and install the custom gcc, use these commands: ```bash git clone git://gcc.gnu.org/git/gcc.git diff --git a/gcc_component_ref_v2.patch b/gcc_component_ref_v2.patch new file mode 100644 index 0000000..8467db8 --- /dev/null +++ b/gcc_component_ref_v2.patch @@ -0,0 +1,255 @@ +From 474c2208836a447bb2452a45a0e76453b1d9c56c Mon Sep 17 00:00:00 2001 +From: York Jasper Niebuhr +Date: Thu, 30 Oct 2025 13:04:18 +0100 +Subject: [PATCH] c_parse_component_ref callback + +--- + gcc/c-family/c-common.cc | 48 ++++++++++++++++++++------- + gcc/c-family/c-common.h | 3 +- + gcc/c/c-parser.cc | 70 +++++++++++++++++++++++++++++++++++++++- + 3 files changed, 108 insertions(+), 13 deletions(-) + +diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc +index 587d76461e9..d34edfaa688 100644 +--- a/gcc/c-family/c-common.cc ++++ b/gcc/c-family/c-common.cc +@@ -7076,43 +7076,48 @@ c_common_to_target_charset (HOST_WIDE_INT c) + the whole expression. Return the folded result. */ + + tree +-fold_offsetof (tree expr, tree type, enum tree_code ctx) ++fold_offsetof (tree expr, tree type, enum tree_code ctx, bool may_fail) + { + tree base, off, t; + tree_code code = TREE_CODE (expr); ++ + switch (code) + { + case ERROR_MARK: + return expr; + + case VAR_DECL: +- error ("cannot apply % to static data member %qD", expr); ++ if (!may_fail) ++ error ("cannot apply % to static data member %qD", expr); + return error_mark_node; + + case CALL_EXPR: + case TARGET_EXPR: +- error ("cannot apply % when % is overloaded"); ++ if (!may_fail) ++ error ("cannot apply % when % is overloaded"); + return error_mark_node; + + case NOP_EXPR: + case INDIRECT_REF: + if (!TREE_CONSTANT (TREE_OPERAND (expr, 0))) + { +- error ("cannot apply % to a non constant address"); ++ if (!may_fail) ++ error ("cannot apply % to a non constant address"); + return error_mark_node; + } + return convert (type, TREE_OPERAND (expr, 0)); + + case COMPONENT_REF: +- base = fold_offsetof (TREE_OPERAND (expr, 0), type, code); ++ base = fold_offsetof (TREE_OPERAND (expr, 0), type, code, may_fail); + if (base == error_mark_node) + return base; + + t = TREE_OPERAND (expr, 1); + if (DECL_C_BIT_FIELD (t)) + { +- error ("attempt to take address of bit-field structure " +- "member %qD", t); ++ if (!may_fail) ++ error ("attempt to take address of bit-field structure " ++ "member %qD", t); + return error_mark_node; + } + off = size_binop_loc (input_location, PLUS_EXPR, DECL_FIELD_OFFSET (t), +@@ -7121,7 +7126,7 @@ fold_offsetof (tree expr, tree type, enum tree_code ctx) + break; + + case ARRAY_REF: +- base = fold_offsetof (TREE_OPERAND (expr, 0), type, code); ++ base = fold_offsetof (TREE_OPERAND (expr, 0), type, code, may_fail); + if (base == error_mark_node) + return base; + +@@ -7178,17 +7183,38 @@ fold_offsetof (tree expr, tree type, enum tree_code ctx) + case COMPOUND_EXPR: + /* Handle static members of volatile structs. */ + t = TREE_OPERAND (expr, 1); +- gcc_checking_assert (VAR_P (get_base_address (t))); +- return fold_offsetof (t, type); ++ if (!VAR_P (get_base_address (t))) ++ return error_mark_node; ++ return fold_offsetof (t, type, ERROR_MARK, may_fail); + + default: +- gcc_unreachable (); ++ return error_mark_node; + } + + if (!POINTER_TYPE_P (type)) + return size_binop (PLUS_EXPR, base, convert (type, off)); + return fold_build_pointer_plus (base, off); + } ++ ++/* Tries folding expr using fold_offsetof. On success, the folded offsetof ++ is returned. On failure, the original expr is wrapped in an ADDR_EXPR ++ and converted to the desired expression type. The resulting expression ++ may or may not be constant! */ ++ ++tree ++fold_offsetof_maybe (tree expr, tree type) ++{ ++ /* expr might not have the correct structure, thus folding may fail. */ ++ tree maybe_folded = fold_offsetof (expr, type, ERROR_MARK, true); ++ if (maybe_folded != error_mark_node) ++ return maybe_folded; ++ ++ tree ptr_type = build_pointer_type (TREE_TYPE (expr)); ++ tree ptr = build1 (ADDR_EXPR, ptr_type, expr); ++ ++ return fold_convert (type, ptr); ++} ++ + + /* *PTYPE is an incomplete array. Complete it with a domain based on + INITIAL_VALUE. If INITIAL_VALUE is not present, use 1 if DO_DEFAULT +diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h +index ea6c2975056..70fcfeb6661 100644 +--- a/gcc/c-family/c-common.h ++++ b/gcc/c-family/c-common.h +@@ -1174,7 +1174,8 @@ extern bool c_dump_tree (void *, tree); + extern void verify_sequence_points (tree); + + extern tree fold_offsetof (tree, tree = size_type_node, +- tree_code ctx = ERROR_MARK); ++ tree_code ctx = ERROR_MARK, bool may_fail = false); ++extern tree fold_offsetof_maybe (tree, tree = size_type_node); + + extern int complete_array_type (tree *, tree, bool); + extern void complete_flexible_array_elts (tree); +diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc +index 22ec0f849b7..5410b9dc003 100644 +--- a/gcc/c/c-parser.cc ++++ b/gcc/c/c-parser.cc +@@ -11354,6 +11354,62 @@ get_counted_by_ref (tree array_ref) + return NULL_TREE; + } + ++/* Callback type for notifying plugins when the C parser constructs ++ a COMPONENT_REF expression. ++ ++ The callback receives the COMPONENT_REF tree that has just been parsed. ++ It may optionally return a replacement tree, which will be used instead ++ of the original if it is type-compatible. Returning NULL_TREE leaves ++ the expression unchanged. ++ ++ This callback is intended for plugins that wish to observe or transform ++ member-access expressions (such as 'a.b' or 'a->b') or at parse time. */ ++using c_parse_component_ref_cb_t = tree (*)(tree ref); ++ ++/* Plugin-registered callback for COMPONENT_REF parse notifications. ++ Initialized to NULL when no plugin has registered a callback. */ ++static c_parse_component_ref_cb_t c_parse_component_ref_cb = nullptr; ++ ++/* Register a plugin callback to be invoked for each parsed COMPONENT_REF. ++ ++ Only a single callback is supported; registering a new one replaces ++ any previously registered callback. */ ++void ++register_c_parse_component_ref_cb (c_parse_component_ref_cb_t cb) ++{ ++ c_parse_component_ref_cb = cb; ++} ++ ++/* Helper to notify the registered plugin callback that a COMPONENT_REF ++ has been parsed. ++ ++ If a plugin has registered a callback, this function invokes it with ++ the given COMPONENT_REF tree. If the callback returns a non-NULL ++ tree whose type is compatible with the original (as determined by ++ comptypes), the COMPONENT_REF is replaced with that tree. ++ ++ This preserves parser invariants and prevents type inconsistencies ++ in subsequent compilation stages. */ ++static void ++notify_plugin_parse_component_ref (tree* ref) ++{ ++ if (!ref || !c_parse_component_ref_cb) ++ return; ++ ++ tree repl = c_parse_component_ref_cb (*ref); ++ if (!repl) ++ return; ++ ++ if (comptypes (TREE_TYPE (*ref), TREE_TYPE (repl))) ++ { ++ *ref = repl; ++ return; ++ } ++ ++ warning (0, "plugin: tree returned from % " ++ "has incompatible type; ignored"); ++} ++ + /* Parse a postfix expression (C90 6.3.1-6.3.2, C99 6.5.1-6.5.2, + C11 6.5.1-6.5.2). Compound literals aren't handled here; callers have to + call c_parser_postfix_expression_after_paren_type on encountering them. +@@ -11766,6 +11822,9 @@ c_parser_postfix_expression (c_parser *parser) + = build_component_ref (loc, offsetof_ref, comp_tok->value, + comp_tok->location, UNKNOWN_LOCATION, + false); ++ ++ notify_plugin_parse_component_ref (&offsetof_ref); ++ + c_parser_consume_token (parser); + while (c_parser_next_token_is (parser, CPP_DOT) + || c_parser_next_token_is (parser, +@@ -11800,6 +11859,9 @@ c_parser_postfix_expression (c_parser *parser) + comp_tok->location, + UNKNOWN_LOCATION, + false); ++ ++ notify_plugin_parse_component_ref (&offsetof_ref); ++ + c_parser_consume_token (parser); + } + else +@@ -11823,7 +11885,7 @@ c_parser_postfix_expression (c_parser *parser) + location_t end_loc = c_parser_peek_token (parser)->get_finish (); + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, + "expected %<)%>"); +- expr.value = fold_offsetof (offsetof_ref); ++ expr.value = fold_offsetof_maybe (offsetof_ref); + set_c_expr_source_range (&expr, loc, end_loc); + } + break; +@@ -13771,6 +13833,9 @@ c_parser_postfix_expression_after_primary (c_parser *parser, + c_parser_consume_token (parser); + expr.value = build_component_ref (op_loc, expr.value, ident, + comp_loc, UNKNOWN_LOCATION); ++ ++ notify_plugin_parse_component_ref (&expr.value); ++ + set_c_expr_source_range (&expr, start, finish); + expr.original_code = ERROR_MARK; + if (TREE_CODE (expr.value) != COMPONENT_REF) +@@ -13813,6 +13878,9 @@ c_parser_postfix_expression_after_primary (c_parser *parser, + RO_ARROW), + ident, comp_loc, + expr.get_location ()); ++ ++ notify_plugin_parse_component_ref (&expr.value); ++ + set_c_expr_source_range (&expr, start, finish); + expr.original_code = ERROR_MARK; + if (TREE_CODE (expr.value) != COMPONENT_REF) +-- +2.43.0 +