summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch')
-rw-r--r--meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch1765
1 files changed, 0 insertions, 1765 deletions
diff --git a/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch b/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch
deleted file mode 100644
index 9bad81d4d0..0000000000
--- a/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch
+++ /dev/null
@@ -1,1765 +0,0 @@
-From 51c500269bf53749b107807d84271385fad35628 Mon Sep 17 00:00:00 2001
-From: Marek Polacek <polacek@redhat.com>
-Date: Wed, 6 Oct 2021 14:33:59 -0400
-Subject: [PATCH] libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026]
-
-From a link below:
-"An issue was discovered in the Bidirectional Algorithm in the Unicode
-Specification through 14.0. It permits the visual reordering of
-characters via control sequences, which can be used to craft source code
-that renders different logic than the logical ordering of tokens
-ingested by compilers and interpreters. Adversaries can leverage this to
-encode source code for compilers accepting Unicode such that targeted
-vulnerabilities are introduced invisibly to human reviewers."
-
-More info:
-https://nvd.nist.gov/vuln/detail/CVE-2021-42574
-https://trojansource.codes/
-
-This is not a compiler bug. However, to mitigate the problem, this patch
-implements -Wbidi-chars=[none|unpaired|any] to warn about possibly
-misleading Unicode bidirectional control characters the preprocessor may
-encounter.
-
-The default is =unpaired, which warns about improperly terminated
-bidirectional control characters; e.g. a LRE without its corresponding PDF.
-The level =any warns about any use of bidirectional control characters.
-
-This patch handles both UCNs and UTF-8 characters. UCNs designating
-bidi characters in identifiers are accepted since r204886. Then r217144
-enabled -fextended-identifiers by default. Extended characters in C/C++
-identifiers have been accepted since r275979. However, this patch still
-warns about mixing UTF-8 and UCN bidi characters; there seems to be no
-good reason to allow mixing them.
-
-We warn in different contexts: comments (both C and C++-style), string
-literals, character constants, and identifiers. Expectedly, UCNs are ignored
-in comments and raw string literals. The bidirectional control characters
-can nest so this patch handles that as well.
-
-I have not included nor tested this at all with Fortran (which also has
-string literals and line comments).
-
-Dave M. posted patches improving diagnostic involving Unicode characters.
-This patch does not make use of this new infrastructure yet.
-
- PR preprocessor/103026
-
-gcc/c-family/ChangeLog:
-
- * c.opt (Wbidi-chars, Wbidi-chars=): New option.
-
-gcc/ChangeLog:
-
- * doc/invoke.texi: Document -Wbidi-chars.
-
-libcpp/ChangeLog:
-
- * include/cpplib.h (enum cpp_bidirectional_level): New.
- (struct cpp_options): Add cpp_warn_bidirectional.
- (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
- * internal.h (struct cpp_reader): Add warn_bidi_p member
- function.
- * init.c (cpp_create_reader): Set cpp_warn_bidirectional.
- * lex.c (bidi): New namespace.
- (get_bidi_utf8): New function.
- (get_bidi_ucn): Likewise.
- (maybe_warn_bidi_on_close): Likewise.
- (maybe_warn_bidi_on_char): Likewise.
- (_cpp_skip_block_comment): Implement warning about bidirectional
- control characters.
- (skip_line_comment): Likewise.
- (forms_identifier_p): Likewise.
- (lex_identifier): Likewise.
- (lex_string): Likewise.
- (lex_raw_string): Likewise.
-
-gcc/testsuite/ChangeLog:
-
- * c-c++-common/Wbidi-chars-1.c: New test.
- * c-c++-common/Wbidi-chars-2.c: New test.
- * c-c++-common/Wbidi-chars-3.c: New test.
- * c-c++-common/Wbidi-chars-4.c: New test.
- * c-c++-common/Wbidi-chars-5.c: New test.
- * c-c++-common/Wbidi-chars-6.c: New test.
- * c-c++-common/Wbidi-chars-7.c: New test.
- * c-c++-common/Wbidi-chars-8.c: New test.
- * c-c++-common/Wbidi-chars-9.c: New test.
- * c-c++-common/Wbidi-chars-10.c: New test.
- * c-c++-common/Wbidi-chars-11.c: New test.
- * c-c++-common/Wbidi-chars-12.c: New test.
- * c-c++-common/Wbidi-chars-13.c: New test.
- * c-c++-common/Wbidi-chars-14.c: New test.
- * c-c++-common/Wbidi-chars-15.c: New test.
- * c-c++-common/Wbidi-chars-16.c: New test.
- * c-c++-common/Wbidi-chars-17.c: New test.
-
-CVE: CVE-2021-42574
-Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=51c500269bf53749b107807d84271385fad35628]
-Signed-off-by: Pgowda <pgowda.cve@gmail.com>
-
----
- gcc/c-family/c.opt | 24 ++
- gcc/doc/invoke.texi | 21 +-
- gcc/testsuite/c-c++-common/Wbidi-chars-1.c | 12 +
- gcc/testsuite/c-c++-common/Wbidi-chars-10.c | 27 ++
- gcc/testsuite/c-c++-common/Wbidi-chars-11.c | 13 +
- gcc/testsuite/c-c++-common/Wbidi-chars-12.c | 19 +
- gcc/testsuite/c-c++-common/Wbidi-chars-13.c | 17 +
- gcc/testsuite/c-c++-common/Wbidi-chars-14.c | 38 ++
- gcc/testsuite/c-c++-common/Wbidi-chars-15.c | 59 +++
- gcc/testsuite/c-c++-common/Wbidi-chars-16.c | 26 ++
- gcc/testsuite/c-c++-common/Wbidi-chars-17.c | 30 ++
- gcc/testsuite/c-c++-common/Wbidi-chars-2.c | 9 +
- gcc/testsuite/c-c++-common/Wbidi-chars-3.c | 11 +
- gcc/testsuite/c-c++-common/Wbidi-chars-4.c | 188 +++++++++
- gcc/testsuite/c-c++-common/Wbidi-chars-5.c | 188 +++++++++
- gcc/testsuite/c-c++-common/Wbidi-chars-6.c | 155 ++++++++
- gcc/testsuite/c-c++-common/Wbidi-chars-7.c | 9 +
- gcc/testsuite/c-c++-common/Wbidi-chars-8.c | 13 +
- gcc/testsuite/c-c++-common/Wbidi-chars-9.c | 29 ++
- libcpp/include/cpplib.h | 18 +-
- libcpp/init.c | 1 +
- libcpp/internal.h | 7 +
- libcpp/lex.c | 408 +++++++++++++++++++-
- 23 files changed, 1315 insertions(+), 7 deletions(-)
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-1.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-10.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-11.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-12.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-13.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-14.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-15.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-16.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-17.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-2.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-3.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-4.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-5.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-6.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-7.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-8.c
- create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-9.c
-
-diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
-index 8a4cd634f77..3976fc368db 100644
---- a/gcc/c-family/c.opt
-+++ b/gcc/c-family/c.opt
-@@ -370,6 +370,30 @@ Wbad-function-cast
- C ObjC Var(warn_bad_function_cast) Warning
- Warn about casting functions to incompatible types.
-
-+Wbidi-chars
-+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
-+;
-+
-+Wbidi-chars=
-+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
-+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
-+
-+; Required for these enum values.
-+SourceInclude
-+cpplib.h
-+
-+Enum
-+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
-+
-+EnumValue
-+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
-+
-+EnumValue
-+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
-+
-+EnumValue
-+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
-+
- Wbool-compare
- C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
- Warn about boolean expression compared with an integer value different from true/false.
-diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
-index 6070288856c..a22758d18ee 100644
---- a/gcc/doc/invoke.texi
-+++ b/gcc/doc/invoke.texi
-@@ -326,7 +326,9 @@ Objective-C and Objective-C++ Dialects}.
- -Warith-conversion @gol
- -Warray-bounds -Warray-bounds=@var{n} @gol
- -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
---Wno-attribute-warning -Wbool-compare -Wbool-operation @gol
-+-Wno-attribute-warning @gol
-+-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
-+-Wbool-compare -Wbool-operation @gol
- -Wno-builtin-declaration-mismatch @gol
- -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol
- -Wc11-c2x-compat @gol
-@@ -7559,6 +7561,23 @@ Attributes considered include @code{allo
- This is the default. You can disable these warnings with either
- @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
-
-+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
-+@opindex Wbidi-chars=
-+@opindex Wbidi-chars
-+@opindex Wno-bidi-chars
-+Warn about possibly misleading UTF-8 bidirectional control characters in
-+comments, string literals, character constants, and identifiers. Such
-+characters can change left-to-right writing direction into right-to-left
-+(and vice versa), which can cause confusion between the logical order and
-+visual order. This may be dangerous; for instance, it may seem that a piece
-+of code is not commented out, whereas it in fact is.
-+
-+There are three levels of warning supported by GCC@. The default is
-+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
-+bidi contexts. @option{-Wbidi-chars=none} turns the warning off.
-+@option{-Wbidi-chars=any} warns about any use of bidirectional control
-+characters.
-+
- @item -Wbool-compare
- @opindex Wno-bool-compare
- @opindex Wbool-compare
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
-new file mode 100644
-index 00000000000..34f5ac19271
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
-@@ -0,0 +1,27 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* More nesting testing. */
-+
-+/* RLEâ« LRI⦠PDF⬠PDIâ©*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int LRE_\u202a_PDF_\u202c;
-+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
-+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
-+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
-+int FSI_\u2068;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int FSI_\u2068_PDI_\u2069;
-+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
-+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
-new file mode 100644
-index 00000000000..270ce2368a9
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
-@@ -0,0 +1,13 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* Test that we warn when mixing UCN and UTF-8. */
-+
-+int LRE_âª_PDF_\u202c;
-+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
-+int LRE_\u202a_PDF_â¬_;
-+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
-+const char *s1 = "LRE_âª_PDF_\u202c";
-+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
-+const char *s2 = "LRE_\u202a_PDF_â¬";
-+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
-new file mode 100644
-index 00000000000..b07eec1da91
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
-@@ -0,0 +1,19 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile { target { c || c++11 } } } */
-+/* { dg-options "-Wbidi-chars=any" } */
-+/* Test raw strings. */
-+
-+const char *s1 = R"(a b c LRE⪠1 2 3 PDF⬠x y z)";
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+const char *s2 = R"(a b c RLE⫠1 2 3 PDF⬠x y z)";
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+const char *s3 = R"(a b c LRO⭠1 2 3 PDF⬠x y z)";
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+const char *s4 = R"(a b c RLO⮠1 2 3 PDF⬠x y z)";
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+const char *s7 = R"(a b c FSI⨠1 2 3 PDI⩠x y) z";
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+const char *s8 = R"(a b c PDIâ© x y )z";
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
-+const char *s9 = R"(a b c PDF⬠x y z)";
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-diff -uprN '-x*.orig' '-x*.rej' del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
---- del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 1969-12-31 16:00:00.000000000 -0800
-+++ gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 2021-12-13 23:11:22.328439287 -0800
-@@ -0,0 +1,17 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile { target { c || c++11 } } } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* Test raw strings. */
-+
-+const char *s1 = R"(a b c LRE⪠1 2 3)";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+const char *s2 = R"(a b c RLEâ« 1 2 3)";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+const char *s3 = R"(a b c LROâ­ 1 2 3)";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+const char *s4 = R"(a b c FSI⨠1 2 3)";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+const char *s5 = R"(a b c LRI⦠1 2 3)";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+const char *s6 = R"(a b c RLI⧠1 2 3)";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
-new file mode 100644
-index 00000000000..ba5f75d9553
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
-@@ -0,0 +1,38 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
-+ or RLOs. */
-+
-+/* LRI_â¦_LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â©*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â©
-+// LRI_â¦_RLO_â®_RLE_â«_RLE_â«_PDI_â©
-+// LRI_â¦_RLO_â®_RLE_â«_PDI_â©
-+// FSI_â¨_RLO_â®_PDI_â©
-+// FSI_â¨_FSI_â¨_RLO_â®_PDI_â©
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+
-+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
-+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int PDI_\u2069;
-+int LRI_\u2066_PDI_\u2069;
-+int RLI_\u2067_PDI_\u2069;
-+int LRE_\u202a_LRI_\u2066_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
-+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
-+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int RLO_\u202e_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int RLI_\u2067_PDI_\u2069_RLI_\u2067;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int FSI_\u2068_PDF_\u202c_PDI_\u2069;
-+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
-new file mode 100644
-index 00000000000..a0ce8ff5e2c
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
-@@ -0,0 +1,59 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* Test unpaired bidi control chars in multiline comments. */
-+
-+/*
-+ * LRE⪠end
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/*
-+ * RLEâ« end
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/*
-+ * LROâ­ end
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/*
-+ * RLOâ® end
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/*
-+ * LRI⦠end
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/*
-+ * RLI⧠end
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/*
-+ * FSI⨠end
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/* LREâª
-+ PDF⬠*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+/* FSIâ¨
-+ PDIâ© */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+
-+/* LRE<âª>
-+ *
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
-+
-+/*
-+ * LRE<âª>
-+ */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+
-+/*
-+ *
-+ * LRE<âª> */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+
-+/* RLI<â§> */ /* PDI<â©> */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* LRE<âª> */ /* PDF<â¬> */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
-new file mode 100644
-index 00000000000..baa0159861c
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
-@@ -0,0 +1,26 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=any" } */
-+/* Test LTR/RTL chars. */
-+
-+/* LTR<â> */
-+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
-+// LTR<â>
-+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
-+/* RTL<â> */
-+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
-+// RTL<â>
-+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
-+
-+const char *s1 = "LTR<â>";
-+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
-+const char *s2 = "LTR\u200e";
-+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
-+const char *s3 = "LTR\u200E";
-+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
-+const char *s4 = "RTL<â>";
-+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
-+const char *s5 = "RTL\u200f";
-+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
-+const char *s6 = "RTL\u200F";
-+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
-new file mode 100644
-index 00000000000..07cb4321f96
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
-@@ -0,0 +1,30 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* Test LTR/RTL chars. */
-+
-+/* LTR<â> */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// LTR<â>
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* RTL<â> */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// RTL<â>
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int ltr_\u200e;
-+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
-+int rtl_\u200f;
-+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
-+
-+const char *s1 = "LTR<â>";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+const char *s2 = "LTR\u200e";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+const char *s3 = "LTR\u200E";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+const char *s4 = "RTL<â>";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+const char *s5 = "RTL\u200f";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+const char *s6 = "RTL\u200F";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
-new file mode 100644
-index 00000000000..2340374f276
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
-@@ -0,0 +1,12 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+
-+int main() {
-+ int isAdmin = 0;
-+ /*â® } â¦if (isAdmin)⩠⦠begin admins only */
-+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
-+ __builtin_printf("You are an admin.\n");
-+ /* end admins only â® { â¦*/
-+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
-+ return 0;
-+}
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
-new file mode 100644
-index 00000000000..2340374f276
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
-@@ -0,0 +1,9 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+
-+int main() {
-+ /* Say hello; newlineâ§/*/ return 0 ;
-+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
-+ __builtin_printf("Hello world.\n");
-+ return 0;
-+}
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
-new file mode 100644
-index 00000000000..9dc7edb6e64
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
-@@ -0,0 +1,11 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+
-+int main() {
-+ const char* access_level = "user";
-+ if (__builtin_strcmp(access_level, "userâ® â¦// Check if adminâ© â¦")) {
-+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
-+ __builtin_printf("You are an admin.\n");
-+ }
-+ return 0;
-+}
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
-new file mode 100644
-index 00000000000..639e5c62e88
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
-@@ -0,0 +1,188 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
-+/* Test all bidi chars in various contexts (identifiers, comments,
-+ string literals, character constants), both UCN and UTF-8. The bidi
-+ chars here are properly terminated, except for the character constants. */
-+
-+/* a b c LRE⪠1 2 3 PDF⬠x y z */
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+/* a b c RLE⫠1 2 3 PDF⬠x y z */
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+/* a b c LRO⭠1 2 3 PDF⬠x y z */
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+/* a b c RLO⮠1 2 3 PDF⬠x y z */
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+/* a b c LRI⦠1 2 3 PDI⩠x y z */
-+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
-+/* a b c RLI⧠1 2 3 PDI⩠x y */
-+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
-+/* a b c FSI⨠1 2 3 PDI⩠x y z */
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+
-+/* Same but C++ comments instead. */
-+// a b c LRE⪠1 2 3 PDF⬠x y z
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+// a b c RLE⫠1 2 3 PDF⬠x y z
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+// a b c LRO⭠1 2 3 PDF⬠x y z
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+// a b c RLO⮠1 2 3 PDF⬠x y z
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+// a b c LRI⦠1 2 3 PDI⩠x y z
-+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
-+// a b c RLI⧠1 2 3 PDI⩠x y
-+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
-+// a b c FSI⨠1 2 3 PDI⩠x y z
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+
-+/* Here we're closing an unopened context, warn when =any. */
-+/* a b c PDIâ© x y z */
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
-+/* a b c PDF⬠x y z */
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-+// a b c PDIâ© x y z
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
-+// a b c PDF⬠x y z
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-+
-+/* Multiline comments. */
-+/* a b c PDIâ© x y z
-+ */
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
-+/* a b c PDF⬠x y z
-+ */
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
-+/* first
-+ a b c PDIâ© x y z
-+ */
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
-+/* first
-+ a b c PDF⬠x y z
-+ */
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
-+/* first
-+ a b c PDIâ© x y z */
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
-+/* first
-+ a b c PDF⬠x y z */
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-+
-+void
-+g1 ()
-+{
-+ const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z";
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+ const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z";
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+ const char *s3 = "a b c LRO⭠1 2 3 PDF⬠x y z";
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+ const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z";
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+ const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z";
-+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
-+ const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z";
-+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
-+ const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z";
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+ const char *s8 = "a b c PDIâ© x y z";
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
-+ const char *s9 = "a b c PDF⬠x y z";
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-+
-+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
-+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
-+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
-+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
-+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+}
-+
-+void
-+g2 ()
-+{
-+ const char c1 = '\u202a';
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+ const char c2 = '\u202A';
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+ const char c3 = '\u202b';
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+ const char c4 = '\u202B';
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+ const char c5 = '\u202d';
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+ const char c6 = '\u202D';
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+ const char c7 = '\u202e';
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+ const char c8 = '\u202E';
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+ const char c9 = '\u2066';
-+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
-+ const char c10 = '\u2067';
-+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
-+ const char c11 = '\u2068';
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+}
-+
-+int aâªbâ¬c;
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+int aâ«bâ¬c;
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+int aâ­bâ¬c;
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+int aâ®bâ¬c;
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+int aâ¦bâ©c;
-+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
-+int aâ§bâ©c;
-+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
-+int aâ¨bâ©c;
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+int Aâ¬X;
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-+int A\u202cY;
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-+int A\u202CY2;
-+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
-+
-+int d\u202ae\u202cf;
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+int d\u202Ae\u202cf2;
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+int d\u202be\u202cf;
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+int d\u202Be\u202cf2;
-+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
-+int d\u202de\u202cf;
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+int d\u202De\u202cf2;
-+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
-+int d\u202ee\u202cf;
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+int d\u202Ee\u202cf2;
-+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
-+int d\u2066e\u2069f;
-+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
-+int d\u2067e\u2069f;
-+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
-+int d\u2068e\u2069f;
-+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
-+int X\u2069;
-+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
-new file mode 100644
-index 00000000000..68cb053144b
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
-@@ -0,0 +1,188 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
-+/* Test all bidi chars in various contexts (identifiers, comments,
-+ string literals, character constants), both UCN and UTF-8. The bidi
-+ chars here are properly terminated, except for the character constants. */
-+
-+/* a b c LRE⪠1 2 3 PDF⬠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c RLE⫠1 2 3 PDF⬠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c LRO⭠1 2 3 PDF⬠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c RLO⮠1 2 3 PDF⬠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c LRI⦠1 2 3 PDI⩠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c RLI⧠1 2 3 PDI⩠x y */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c FSI⨠1 2 3 PDI⩠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+
-+/* Same but C++ comments instead. */
-+// a b c LRE⪠1 2 3 PDF⬠x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c RLE⫠1 2 3 PDF⬠x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c LRO⭠1 2 3 PDF⬠x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c RLO⮠1 2 3 PDF⬠x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c LRI⦠1 2 3 PDI⩠x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c RLI⧠1 2 3 PDI⩠x y
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c FSI⨠1 2 3 PDI⩠x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+
-+/* Here we're closing an unopened context, warn when =any. */
-+/* a b c PDIâ© x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c PDF⬠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c PDIâ© x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+// a b c PDF⬠x y z
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+
-+/* Multiline comments. */
-+/* a b c PDIâ© x y z
-+ */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
-+/* a b c PDF⬠x y z
-+ */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
-+/* first
-+ a b c PDIâ© x y z
-+ */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
-+/* first
-+ a b c PDF⬠x y z
-+ */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
-+/* first
-+ a b c PDIâ© x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+/* first
-+ a b c PDF⬠x y z */
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+
-+void
-+g1 ()
-+{
-+ const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s3 = "a b c LRO⭠1 2 3 PDF⬠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s8 = "a b c PDIâ© x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s9 = "a b c PDF⬠x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+
-+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+}
-+
-+void
-+g2 ()
-+{
-+ const char c1 = '\u202a';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c2 = '\u202A';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c3 = '\u202b';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c4 = '\u202B';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c5 = '\u202d';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c6 = '\u202D';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c7 = '\u202e';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c8 = '\u202E';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c9 = '\u2066';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c10 = '\u2067';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char c11 = '\u2068';
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+}
-+
-+int aâªbâ¬c;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int aâ«bâ¬c;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int aâ­bâ¬c;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int aâ®bâ¬c;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int aâ¦bâ©c;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int aâ§bâ©c;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int aâ¨bâ©c;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int Aâ¬X;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int A\u202cY;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int A\u202CY2;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+
-+int d\u202ae\u202cf;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u202Ae\u202cf2;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u202be\u202cf;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u202Be\u202cf2;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u202de\u202cf;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u202De\u202cf2;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u202ee\u202cf;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u202Ee\u202cf2;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u2066e\u2069f;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u2067e\u2069f;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int d\u2068e\u2069f;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-+int X\u2069;
-+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
-new file mode 100644
-index 00000000000..0ce6fff2dee
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
-@@ -0,0 +1,155 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* Test nesting of bidi chars in various contexts. */
-+
-+/* Terminated by the wrong char: */
-+/* a b c LRE⪠1 2 3 PDI⩠x y z */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c RLEâ« 1 2 3 PDIâ© x y z*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c LROâ­ 1 2 3 PDIâ© x y z */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c RLOâ® 1 2 3 PDIâ© x y z */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c LRI⦠1 2 3 PDF⬠x y z */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c RLI⧠1 2 3 PDF⬠x y z */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* a b c FSI⨠1 2 3 PDF⬠x y z*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+
-+/* LRE⪠PDF⬠*/
-+/* LRE⪠LRE⪠PDF⬠PDF⬠*/
-+/* PDF⬠LRE⪠PDF⬠*/
-+/* LRE⪠PDF⬠LRE⪠PDF⬠*/
-+/* LRE⪠LRE⪠PDF⬠*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* PDF⬠LRE⪠*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+
-+// a b c LRE⪠1 2 3 PDI⩠x y z
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// a b c RLEâ« 1 2 3 PDIâ© x y z*/
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// a b c LROâ­ 1 2 3 PDIâ© x y z
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// a b c RLOâ® 1 2 3 PDIâ© x y z
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// a b c LRI⦠1 2 3 PDF⬠x y z
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// a b c RLI⧠1 2 3 PDF⬠x y z
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// a b c FSI⨠1 2 3 PDF⬠x y z
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+
-+// LRE⪠PDFâ¬
-+// LRE⪠LRE⪠PDF⬠PDFâ¬
-+// PDF⬠LRE⪠PDFâ¬
-+// LRE⪠PDF⬠LRE⪠PDFâ¬
-+// LRE⪠LRE⪠PDFâ¬
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+// PDF⬠LREâª
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+
-+void
-+g1 ()
-+{
-+ const char *s1 = "a b c LRE⪠1 2 3 PDI⩠x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s3 = "a b c RLEâ« 1 2 3 PDIâ© x y ";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s5 = "a b c LROâ­ 1 2 3 PDIâ© x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s7 = "a b c RLOâ® 1 2 3 PDIâ© x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s9 = "a b c LRI⦠1 2 3 PDF⬠x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s11 = "a b c RLI⧠1 2 3 PDF⬠x y z\
-+ ";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s13 = "a b c FSI⨠1 2 3 PDF⬠x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s15 = "PDF⬠LREâª";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s16 = "PDF\u202c LRE\u202a";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s17 = "LRE⪠PDFâ¬";
-+ const char *s18 = "LRE\u202a PDF\u202c";
-+ const char *s19 = "LRE⪠LRE⪠PDF⬠PDFâ¬";
-+ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
-+ const char *s21 = "PDF⬠LRE⪠PDFâ¬";
-+ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
-+ const char *s23 = "LRE⪠LRE⪠PDFâ¬";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s25 = "PDF⬠LREâª";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s26 = "PDF\u202c LRE\u202a";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s27 = "PDF⬠LRE\u202a";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+ const char *s28 = "PDF\u202c LREâª";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+}
-+
-+int aLREâªbPDIâ©;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int A\u202aB\u2069C;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aRLEâ«bPDIâ©;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int a\u202bB\u2069c;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aLROâ­bPDIâ©;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int a\u202db\u2069c2;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aRLOâ®bPDIâ©;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int a\u202eb\u2069;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aLRIâ¦bPDFâ¬;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int a\u2066b\u202c;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aRLIâ§bPDFâ¬c
-+;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
-+int a\u2067b\u202c;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aFSIâ¨bPDFâ¬;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int a\u2068b\u202c;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aFSIâ¨bPD\u202C;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aFSI\u2068bPDFâ¬_;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int aLREâªbPDFâ¬b;
-+int A\u202aB\u202c;
-+int a_LREâª_LREâª_b_PDFâ¬_PDFâ¬;
-+int A\u202aA\u202aB\u202cB\u202c;
-+int aPDFâ¬bLREadPDFâ¬;
-+int a_\u202C_\u202a_\u202c;
-+int a_LREâª_b_PDFâ¬_c_LREâª_PDFâ¬;
-+int a_\u202a_\u202c_\u202a_\u202c_;
-+int a_LREâª_b_PDFâ¬_c_LREâª;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int a_\u202a_\u202c_\u202a_;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
-new file mode 100644
-index 00000000000..d012d420ec0
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
-@@ -0,0 +1,9 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=any" } */
-+/* Test we ignore UCNs in comments. */
-+
-+// a b c \u202a 1 2 3
-+// a b c \u202A 1 2 3
-+/* a b c \u202a 1 2 3 */
-+/* a b c \u202A 1 2 3 */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
-new file mode 100644
-index 00000000000..4f54c5092ec
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
-@@ -0,0 +1,13 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=any" } */
-+/* Test \u vs \U. */
-+
-+int a_\u202A;
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+int a_\u202a_2;
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+int a_\U0000202A_3;
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-+int a_\U0000202a_4;
-+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
-diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
-new file mode 100644
-index 00000000000..e2af1b1ca97
---- /dev/null
-+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
-@@ -0,0 +1,29 @@
-+/* PR preprocessor/103026 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wbidi-chars=unpaired" } */
-+/* Test that we properly separate bidi contexts (comment/identifier/character
-+ constant/string literal). */
-+
-+/* LRE ->âª<- */ int pdf_\u202c_1;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* RLE ->â«<- */ int pdf_\u202c_2;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* LRO ->â­<- */ int pdf_\u202c_3;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* RLO ->â®<- */ int pdf_\u202c_4;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* LRI ->â¦<-*/ int pdi_\u2069_1;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* RLI ->â§<- */ int pdi_\u2069_12;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* FSI ->â¨<- */ int pdi_\u2069_3;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+
-+const char *s1 = "LRE\u202a"; /* PDF ->â¬<- */
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+/* LRE ->âª<- */ const char *s2 = "PDF\u202c";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-+int lre_\u202a; const char *s4 = "PDF\u202c";
-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
-diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
-index 176f8c5bbce..112b9c24751 100644
---- a/libcpp/include/cpplib.h
-+++ b/libcpp/include/cpplib.h
-@@ -318,6 +318,17 @@ enum cpp_main_search
- CMS_system, /* Search the system INCLUDE path. */
- };
-
-+/* The possible bidirectional control characters checking levels, from least
-+ restrictive to most. */
-+enum cpp_bidirectional_level {
-+ /* No checking. */
-+ bidirectional_none,
-+ /* Only detect unpaired uses of bidirectional control characters. */
-+ bidirectional_unpaired,
-+ /* Detect any use of bidirectional control characters. */
-+ bidirectional_any
-+};
-+
- /* This structure is nested inside struct cpp_reader, and
- carries all the options visible to the command line. */
- struct cpp_options
-@@ -531,6 +542,10 @@ struct cpp_options
- /* True if warn about differences between C++98 and C++11. */
- bool cpp_warn_cxx11_compat;
-
-+ /* Nonzero if bidirectional control characters checking is on. See enum
-+ cpp_bidirectional_level. */
-+ unsigned char cpp_warn_bidirectional;
-+
- /* Dependency generation. */
- struct
- {
-@@ -635,7 +650,8 @@ enum cpp_warning_reason {
- CPP_W_C90_C99_COMPAT,
- CPP_W_C11_C2X_COMPAT,
- CPP_W_CXX11_COMPAT,
-- CPP_W_EXPANSION_TO_DEFINED
-+ CPP_W_EXPANSION_TO_DEFINED,
-+ CPP_W_BIDIRECTIONAL
- };
-
- /* Callback for header lookup for HEADER, which is the name of a
-diff --git a/libcpp/init.c b/libcpp/init.c
-index 5a424e23553..f9a8f5f088f 100644
---- a/libcpp/init.c
-+++ b/libcpp/init.c
-@@ -219,6 +219,7 @@ cpp_create_reader (enum c_lang lang, cpp
- = ENABLE_CANONICAL_SYSTEM_HEADERS;
- CPP_OPTION (pfile, ext_numeric_literals) = 1;
- CPP_OPTION (pfile, warn_date_time) = 0;
-+ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
-
- /* Default CPP arithmetic to something sensible for the host for the
- benefit of dumb users like fix-header. */
-diff --git a/libcpp/internal.h b/libcpp/internal.h
-index 8577cab6c83..0ce0246c5a2 100644
---- a/libcpp/internal.h
-+++ b/libcpp/internal.h
-@@ -597,6 +597,13 @@ struct cpp_reader
- /* Location identifying the main source file -- intended to be line
- zero of said file. */
- location_t main_loc;
-+
-+ /* Returns true iff we should warn about UTF-8 bidirectional control
-+ characters. */
-+ bool warn_bidi_p () const
-+ {
-+ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
-+ }
- };
-
- /* Character classes. Based on the more primitive macros in safe-ctype.h.
-diff --git a/libcpp/lex.c b/libcpp/lex.c
-index fa2253d41c3..6a4fbce6030 100644
---- a/libcpp/lex.c
-+++ b/libcpp/lex.c
-@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfi
- }
- }
-
-+namespace bidi {
-+ enum class kind {
-+ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
-+ };
-+
-+ /* All the UTF-8 encodings of bidi characters start with E2. */
-+ constexpr uchar utf8_start = 0xe2;
-+
-+ /* A vector holding currently open bidi contexts. We use a char for
-+ each context, its LSB is 1 if it represents a PDF context, 0 if it
-+ represents a PDI context. The next bit is 1 if this context was open
-+ by a bidi character written as a UCN, and 0 when it was UTF-8. */
-+ semi_embedded_vec <unsigned char, 16> vec;
-+
-+ /* Close the whole comment/identifier/string literal/character constant
-+ context. */
-+ void on_close ()
-+ {
-+ vec.truncate (0);
-+ }
-+
-+ /* Pop the last element in the vector. */
-+ void pop ()
-+ {
-+ unsigned int len = vec.count ();
-+ gcc_checking_assert (len > 0);
-+ vec.truncate (len - 1);
-+ }
-+
-+ /* Return the context of the Ith element. */
-+ kind ctx_at (unsigned int i)
-+ {
-+ return (vec[i] & 1) ? kind::PDF : kind::PDI;
-+ }
-+
-+ /* Return which context is currently opened. */
-+ kind current_ctx ()
-+ {
-+ unsigned int len = vec.count ();
-+ if (len == 0)
-+ return kind::NONE;
-+ return ctx_at (len - 1);
-+ }
-+
-+ /* Return true if the current context comes from a UCN origin, that is,
-+ the bidi char which started this bidi context was written as a UCN. */
-+ bool current_ctx_ucn_p ()
-+ {
-+ unsigned int len = vec.count ();
-+ gcc_checking_assert (len > 0);
-+ return (vec[len - 1] >> 1) & 1;
-+ }
-+
-+ /* We've read a bidi char, update the current vector as necessary. */
-+ void on_char (kind k, bool ucn_p)
-+ {
-+ switch (k)
-+ {
-+ case kind::LRE:
-+ case kind::RLE:
-+ case kind::LRO:
-+ case kind::RLO:
-+ vec.push (ucn_p ? 3u : 1u);
-+ break;
-+ case kind::LRI:
-+ case kind::RLI:
-+ case kind::FSI:
-+ vec.push (ucn_p ? 2u : 0u);
-+ break;
-+ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
-+ whose scope has not yet been terminated. */
-+ case kind::PDF:
-+ if (current_ctx () == kind::PDF)
-+ pop ();
-+ break;
-+ /* PDI terminates the scope of the last LRI, RLI, or FSI whose
-+ scope has not yet been terminated, as well as the scopes of
-+ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
-+ yet been terminated. */
-+ case kind::PDI:
-+ for (int i = vec.count () - 1; i >= 0; --i)
-+ if (ctx_at (i) == kind::PDI)
-+ {
-+ vec.truncate (i);
-+ break;
-+ }
-+ break;
-+ case kind::LTR:
-+ case kind::RTL:
-+ /* These aren't popped by a PDF/PDI. */
-+ break;
-+ [[likely]] case kind::NONE:
-+ break;
-+ default:
-+ abort ();
-+ }
-+ }
-+
-+ /* Return a descriptive string for K. */
-+ const char *to_str (kind k)
-+ {
-+ switch (k)
-+ {
-+ case kind::LRE:
-+ return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
-+ case kind::RLE:
-+ return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
-+ case kind::LRO:
-+ return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
-+ case kind::RLO:
-+ return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
-+ case kind::LRI:
-+ return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
-+ case kind::RLI:
-+ return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
-+ case kind::FSI:
-+ return "U+2068 (FIRST STRONG ISOLATE)";
-+ case kind::PDF:
-+ return "U+202C (POP DIRECTIONAL FORMATTING)";
-+ case kind::PDI:
-+ return "U+2069 (POP DIRECTIONAL ISOLATE)";
-+ case kind::LTR:
-+ return "U+200E (LEFT-TO-RIGHT MARK)";
-+ case kind::RTL:
-+ return "U+200F (RIGHT-TO-LEFT MARK)";
-+ default:
-+ abort ();
-+ }
-+ }
-+}
-+
-+/* Parse a sequence of 3 bytes starting with P and return its bidi code. */
-+
-+static bidi::kind
-+get_bidi_utf8 (const unsigned char *const p)
-+{
-+ gcc_checking_assert (p[0] == bidi::utf8_start);
-+
-+ if (p[1] == 0x80)
-+ switch (p[2])
-+ {
-+ case 0xaa:
-+ return bidi::kind::LRE;
-+ case 0xab:
-+ return bidi::kind::RLE;
-+ case 0xac:
-+ return bidi::kind::PDF;
-+ case 0xad:
-+ return bidi::kind::LRO;
-+ case 0xae:
-+ return bidi::kind::RLO;
-+ case 0x8e:
-+ return bidi::kind::LTR;
-+ case 0x8f:
-+ return bidi::kind::RTL;
-+ default:
-+ break;
-+ }
-+ else if (p[1] == 0x81)
-+ switch (p[2])
-+ {
-+ case 0xa6:
-+ return bidi::kind::LRI;
-+ case 0xa7:
-+ return bidi::kind::RLI;
-+ case 0xa8:
-+ return bidi::kind::FSI;
-+ case 0xa9:
-+ return bidi::kind::PDI;
-+ default:
-+ break;
-+ }
-+
-+ return bidi::kind::NONE;
-+}
-+
-+/* Parse a UCN where P points just past \u or \U and return its bidi code. */
-+
-+static bidi::kind
-+get_bidi_ucn (const unsigned char *p, bool is_U)
-+{
-+ /* 6.4.3 Universal Character Names
-+ \u hex-quad
-+ \U hex-quad hex-quad
-+ where \unnnn means \U0000nnnn. */
-+
-+ if (is_U)
-+ {
-+ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
-+ return bidi::kind::NONE;
-+ /* Skip 4B so we can treat \u and \U the same below. */
-+ p += 4;
-+ }
-+
-+ /* All code points we are looking for start with 20xx. */
-+ if (p[0] != '2' || p[1] != '0')
-+ return bidi::kind::NONE;
-+ else if (p[2] == '2')
-+ switch (p[3])
-+ {
-+ case 'a':
-+ case 'A':
-+ return bidi::kind::LRE;
-+ case 'b':
-+ case 'B':
-+ return bidi::kind::RLE;
-+ case 'c':
-+ case 'C':
-+ return bidi::kind::PDF;
-+ case 'd':
-+ case 'D':
-+ return bidi::kind::LRO;
-+ case 'e':
-+ case 'E':
-+ return bidi::kind::RLO;
-+ default:
-+ break;
-+ }
-+ else if (p[2] == '6')
-+ switch (p[3])
-+ {
-+ case '6':
-+ return bidi::kind::LRI;
-+ case '7':
-+ return bidi::kind::RLI;
-+ case '8':
-+ return bidi::kind::FSI;
-+ case '9':
-+ return bidi::kind::PDI;
-+ default:
-+ break;
-+ }
-+ else if (p[2] == '0')
-+ switch (p[3])
-+ {
-+ case 'e':
-+ case 'E':
-+ return bidi::kind::LTR;
-+ case 'f':
-+ case 'F':
-+ return bidi::kind::RTL;
-+ default:
-+ break;
-+ }
-+
-+ return bidi::kind::NONE;
-+}
-+
-+/* We're closing a bidi context, that is, we've encountered a newline,
-+ are closing a C-style comment, or are at the end of a string literal,
-+ character constant, or identifier. Warn if this context was not
-+ properly terminated by a PDI or PDF. P points to the last character
-+ in this context. */
-+
-+static void
-+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
-+{
-+ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
-+ && bidi::vec.count () > 0)
-+ {
-+ const location_t loc
-+ = linemap_position_for_column (pfile->line_table,
-+ CPP_BUF_COLUMN (pfile->buffer, p));
-+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-+ "unpaired UTF-8 bidirectional control character "
-+ "detected");
-+ }
-+ /* We're done with this context. */
-+ bidi::on_close ();
-+}
-+
-+/* We're at the beginning or in the middle of an identifier/comment/string
-+ literal/character constant. Warn if we've encountered a bidi character.
-+ KIND says which bidi character it was; P points to it in the character
-+ stream. UCN_P is true iff this bidi character was written as a UCN. */
-+
-+static void
-+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
-+ bool ucn_p)
-+{
-+ if (__builtin_expect (kind == bidi::kind::NONE, 1))
-+ return;
-+
-+ const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
-+
-+ if (warn_bidi != bidirectional_none)
-+ {
-+ const location_t loc
-+ = linemap_position_for_column (pfile->line_table,
-+ CPP_BUF_COLUMN (pfile->buffer, p));
-+ /* It seems excessive to warn about a PDI/PDF that is closing
-+ an opened context because we've already warned about the
-+ opening character. Except warn when we have a UCN x UTF-8
-+ mismatch. */
-+ if (kind == bidi::current_ctx ())
-+ {
-+ if (warn_bidi == bidirectional_unpaired
-+ && bidi::current_ctx_ucn_p () != ucn_p)
-+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-+ "UTF-8 vs UCN mismatch when closing "
-+ "a context by \"%s\"", bidi::to_str (kind));
-+ }
-+ else if (warn_bidi == bidirectional_any)
-+ {
-+ if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
-+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-+ "\"%s\" is closing an unopened context",
-+ bidi::to_str (kind));
-+ else
-+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-+ "found problematic Unicode character \"%s\"",
-+ bidi::to_str (kind));
-+ }
-+ }
-+ /* We're done with this context. */
-+ bidi::on_char (kind, ucn_p);
-+}
-+
- /* Skip a C-style block comment. We find the end of the comment by
- seeing if an asterisk is before every '/' we encounter. Returns
- nonzero if comment terminated by EOF, zero otherwise.
-@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfi
- cpp_buffer *buffer = pfile->buffer;
- const uchar *cur = buffer->cur;
- uchar c;
-+ const bool warn_bidi_p = pfile->warn_bidi_p ();
-
- cur++;
- if (*cur == '/')
-@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfi
- if (c == '/')
- {
- if (cur[-2] == '*')
-- break;
-+ {
-+ if (warn_bidi_p)
-+ maybe_warn_bidi_on_close (pfile, cur);
-+ break;
-+ }
-
- /* Warn about potential nested comments, but not if the '/'
- comes immediately before the true comment delimiter.
-@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfi
- {
- unsigned int cols;
- buffer->cur = cur - 1;
-+ if (warn_bidi_p)
-+ maybe_warn_bidi_on_close (pfile, cur);
- _cpp_process_line_notes (pfile, true);
- if (buffer->next_line >= buffer->rlimit)
- return true;
-@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfi
-
- cur = buffer->cur;
- }
-+ /* If this is a beginning of a UTF-8 encoding, it might be
-+ a bidirectional control character. */
-+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
-+ {
-+ bidi::kind kind = get_bidi_utf8 (cur - 1);
-+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
-+ }
- }
-
- buffer->cur = cur;
-@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
- {
- cpp_buffer *buffer = pfile->buffer;
- location_t orig_line = pfile->line_table->highest_line;
-+ const bool warn_bidi_p = pfile->warn_bidi_p ();
-
-- while (*buffer->cur != '\n')
-- buffer->cur++;
-+ if (!warn_bidi_p)
-+ while (*buffer->cur != '\n')
-+ buffer->cur++;
-+ else
-+ {
-+ while (*buffer->cur != '\n'
-+ && *buffer->cur != bidi::utf8_start)
-+ buffer->cur++;
-+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
-+ {
-+ while (*buffer->cur != '\n')
-+ {
-+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
-+ {
-+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
-+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
-+ /*ucn_p=*/false);
-+ }
-+ buffer->cur++;
-+ }
-+ maybe_warn_bidi_on_close (pfile, buffer->cur);
-+ }
-+ }
-
- _cpp_process_line_notes (pfile, true);
- return orig_line != pfile->line_table->highest_line;
-@@ -1317,11 +1671,13 @@ static const cppchar_t utf8_signifier =
-
- /* Returns TRUE if the sequence starting at buffer->cur is valid in
- an identifier. FIRST is TRUE if this starts an identifier. */
-+
- static bool
- forms_identifier_p (cpp_reader *pfile, int first,
- struct normalize_state *state)
- {
- cpp_buffer *buffer = pfile->buffer;
-+ const bool warn_bidi_p = pfile->warn_bidi_p ();
-
- if (*buffer->cur == '$')
- {
-@@ -1344,6 +1700,13 @@ forms_identifier_p (cpp_reader *pfile, i
- cppchar_t s;
- if (*buffer->cur >= utf8_signifier)
- {
-+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
-+ && warn_bidi_p)
-+ {
-+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
-+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
-+ /*ucn_p=*/false);
-+ }
- if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
- state, &s))
- return true;
-@@ -1352,6 +1715,13 @@ forms_identifier_p (cpp_reader *pfile, i
- && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
- {
- buffer->cur += 2;
-+ if (warn_bidi_p)
-+ {
-+ bidi::kind kind = get_bidi_ucn (buffer->cur,
-+ buffer->cur[-1] == 'U');
-+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
-+ /*ucn_p=*/true);
-+ }
- if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
- state, &s, NULL, NULL))
- return true;
-@@ -1460,6 +1830,7 @@ lex_identifier (cpp_reader *pfile, const
- const uchar *cur;
- unsigned int len;
- unsigned int hash = HT_HASHSTEP (0, *base);
-+ const bool warn_bidi_p = pfile->warn_bidi_p ();
-
- cur = pfile->buffer->cur;
- if (! starts_ucn)
-@@ -1483,6 +1854,8 @@ lex_identifier (cpp_reader *pfile, const
- pfile->buffer->cur++;
- }
- } while (forms_identifier_p (pfile, false, nst));
-+ if (warn_bidi_p)
-+ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
- result = _cpp_interpret_identifier (pfile, base,
- pfile->buffer->cur - base);
- *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
-@@ -1719,6 +2092,7 @@ static void
- lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
- {
- const uchar *pos = base;
-+ const bool warn_bidi_p = pfile->warn_bidi_p ();
-
- /* 'tis a pity this information isn't passed down from the lexer's
- initial categorization of the token. */
-@@ -1955,8 +2329,15 @@ lex_raw_string (cpp_reader *pfile, cpp_t
- pos = base = pfile->buffer->cur;
- note = &pfile->buffer->notes[pfile->buffer->cur_note];
- }
-+ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
-+ && warn_bidi_p)
-+ maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
-+ /*ucn_p=*/false);
- }
-
-+ if (warn_bidi_p)
-+ maybe_warn_bidi_on_close (pfile, pos);
-+
- if (CPP_OPTION (pfile, user_literals))
- {
- /* If a string format macro, say from inttypes.h, is placed touching
-@@ -2051,15 +2432,27 @@ lex_string (cpp_reader *pfile, cpp_token
- else
- terminator = '>', type = CPP_HEADER_NAME;
-
-+ const bool warn_bidi_p = pfile->warn_bidi_p ();
- for (;;)
- {
- cppchar_t c = *cur++;
-
- /* In #include-style directives, terminators are not escapable. */
- if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
-- cur++;
-+ {
-+ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
-+ {
-+ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
-+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
-+ }
-+ cur++;
-+ }
- else if (c == terminator)
-- break;
-+ {
-+ if (warn_bidi_p)
-+ maybe_warn_bidi_on_close (pfile, cur - 1);
-+ break;
-+ }
- else if (c == '\n')
- {
- cur--;
-@@ -2076,6 +2469,11 @@ lex_string (cpp_reader *pfile, cpp_token
- }
- else if (c == '\0')
- saw_NUL = true;
-+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
-+ {
-+ bidi::kind kind = get_bidi_utf8 (cur - 1);
-+ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
-+ }
- }
-
- if (saw_NUL && !pfile->state.skipping)