1 files changed, 91 insertions, 0 deletions
diff --git a/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch b/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch
new file mode 100644
index 0000000000..3053614854
--- /dev/null
+++ b/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch
@@ -0,0 +1,91 @@
+From 15ae97b1c2c14f1263cdc853673c4129625323de Mon Sep 17 00:00:00 2001
+From: Marko Mäkelä <marko.makela@mariadb.com>
+Date: Thu, 8 Feb 2024 08:09:20 +0000
+Subject: [PATCH] MDEV-32578 row_merge_fts_doc_tokenize() handles parser plugin
+  inconsistently
+
+When mysql/mysql-server@0c954c2
+added a plugin interface for FULLTEXT INDEX tokenization to MySQL 5.7,
+fts_tokenize_ctx::processed_len got a second meaning, which is only
+partly implemented in row_merge_fts_doc_tokenize().
+
+This inconsistency could cause a crash when using FULLTEXT...WITH PARSER.
+A test case that would crash MySQL 8.0 when using an n-gram parser and
+single-character words would fail to crash in MySQL 5.7, because the
+buf_full condition in row_merge_fts_doc_tokenize() was not met.
+
+This change is inspired by
+mysql/mysql-server@38e9a07
+that appeared in MySQL 5.7.44.
+
+CVE: CVE-2023-22084
+Upstream-Status: Backport [https://github.com/MariaDB/server/commit/15ae97b1c2c1]
+
+Signed-off-by: Yogita Urade <yogita.urade@windriver.com>
+---
+ storage/innobase/include/row0ftsort.h |  6 +++++-
+ storage/innobase/row/row0ftsort.cc    | 11 ++++++++---
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
+index 65508caf..3ffa8243 100644
+--- a/storage/innobase/include/row0ftsort.h
++++ b/storage/innobase/include/row0ftsort.h
+@@ -104,7 +104,10 @@ typedef UT_LIST_BASE_NODE_T(row_fts_token_t)     fts_token_list_t;
+
+ /** Structure stores information from string tokenization operation */
+ struct fts_tokenize_ctx {
+-	ulint			processed_len;  /*!< processed string length */
++	/** the processed string length in bytes
++	(when using the built-in tokenizer),
++	or the number of row_merge_fts_doc_tokenize_by_parser() calls */
++	ulint			processed_len;
+	ulint			init_pos;       /*!< doc start position */
+	ulint			buf_used;       /*!< the sort buffer (ID) when
+						tokenization stops, which
+@@ -115,6 +118,7 @@ struct fts_tokenize_ctx {
+	ib_rbt_t*		cached_stopword;/*!< in: stopword list */
+	dfield_t		sort_field[FTS_NUM_FIELDS_SORT];
+						/*!< in: sort field */
++	/** parsed tokens (when using an external parser) */
+	fts_token_list_t	fts_token_list;
+
+	fts_tokenize_ctx() :
+diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
+index 86e96624..406ff60f 100644
+--- a/storage/innobase/row/row0ftsort.cc
++++ b/storage/innobase/row/row0ftsort.cc
+@@ -491,7 +491,10 @@ row_merge_fts_doc_tokenize(
+
+	/* Tokenize the data and add each word string, its corresponding
+	doc id and position to sort buffer */
+-	while (t_ctx->processed_len < doc->text.f_len) {
++	while (parser
++               ? (!t_ctx->processed_len
++                  || UT_LIST_GET_LEN(t_ctx->fts_token_list))
++               : t_ctx->processed_len < doc->text.f_len) {
+		ulint		idx = 0;
+		ulint		cur_len;
+		doc_id_t	write_doc_id;
+@@ -831,7 +834,8 @@ void fts_parallel_tokenization(
+			/* Not yet finish processing the "doc" on hand,
+			continue processing it */
+			ut_ad(doc.text.f_str);
+-			ut_ad(t_ctx.processed_len < doc.text.f_len);
++			ut_ad(buf[0]->index->parser
++			      || t_ctx.processed_len < doc.text.f_len);
+		}
+
+		processed = row_merge_fts_doc_tokenize(
+@@ -841,7 +845,8 @@ void fts_parallel_tokenization(
+
+		/* Current sort buffer full, need to recycle */
+		if (!processed) {
+-			ut_ad(t_ctx.processed_len < doc.text.f_len);
++			ut_ad(buf[0]->index->parser
++			      || t_ctx.processed_len < doc.text.f_len);
+			ut_ad(t_ctx.rows_added[t_ctx.buf_used]);
+			break;
+		}
+--
+2.40.0