aboutsummaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch')
-rw-r--r--meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch91
1 files changed, 91 insertions, 0 deletions
diff --git a/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch b/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch
new file mode 100644
index 0000000000..3053614854
--- /dev/null
+++ b/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch
@@ -0,0 +1,91 @@
+From 15ae97b1c2c14f1263cdc853673c4129625323de Mon Sep 17 00:00:00 2001
+From: Marko Mäkelä <marko.makela@mariadb.com>
+Date: Thu, 8 Feb 2024 08:09:20 +0000
+Subject: [PATCH] MDEV-32578 row_merge_fts_doc_tokenize() handles parser plugin
+ inconsistently
+
+When mysql/mysql-server@0c954c2
+added a plugin interface for FULLTEXT INDEX tokenization to MySQL 5.7,
+fts_tokenize_ctx::processed_len got a second meaning, which is only
+partly implemented in row_merge_fts_doc_tokenize().
+
+This inconsistency could cause a crash when using FULLTEXT...WITH PARSER.
+A test case that would crash MySQL 8.0 when using an n-gram parser and
+single-character words would fail to crash in MySQL 5.7, because the
+buf_full condition in row_merge_fts_doc_tokenize() was not met.
+
+This change is inspired by
+mysql/mysql-server@38e9a07
+that appeared in MySQL 5.7.44.
+
+CVE: CVE-2023-22084
+Upstream-Status: Backport [https://github.com/MariaDB/server/commit/15ae97b1c2c1]
+
+Signed-off-by: Yogita Urade <yogita.urade@windriver.com>
+---
+ storage/innobase/include/row0ftsort.h | 6 +++++-
+ storage/innobase/row/row0ftsort.cc | 11 ++++++++---
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
+index 65508caf..3ffa8243 100644
+--- a/storage/innobase/include/row0ftsort.h
++++ b/storage/innobase/include/row0ftsort.h
+@@ -104,7 +104,10 @@ typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t;
+
+ /** Structure stores information from string tokenization operation */
+ struct fts_tokenize_ctx {
+- ulint processed_len; /*!< processed string length */
++ /** the processed string length in bytes
++ (when using the built-in tokenizer),
++ or the number of row_merge_fts_doc_tokenize_by_parser() calls */
++ ulint processed_len;
+ ulint init_pos; /*!< doc start position */
+ ulint buf_used; /*!< the sort buffer (ID) when
+ tokenization stops, which
+@@ -115,6 +118,7 @@ struct fts_tokenize_ctx {
+ ib_rbt_t* cached_stopword;/*!< in: stopword list */
+ dfield_t sort_field[FTS_NUM_FIELDS_SORT];
+ /*!< in: sort field */
++ /** parsed tokens (when using an external parser) */
+ fts_token_list_t fts_token_list;
+
+ fts_tokenize_ctx() :
+diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
+index 86e96624..406ff60f 100644
+--- a/storage/innobase/row/row0ftsort.cc
++++ b/storage/innobase/row/row0ftsort.cc
+@@ -491,7 +491,10 @@ row_merge_fts_doc_tokenize(
+
+ /* Tokenize the data and add each word string, its corresponding
+ doc id and position to sort buffer */
+- while (t_ctx->processed_len < doc->text.f_len) {
++ while (parser
++ ? (!t_ctx->processed_len
++ || UT_LIST_GET_LEN(t_ctx->fts_token_list))
++ : t_ctx->processed_len < doc->text.f_len) {
+ ulint idx = 0;
+ ulint cur_len;
+ doc_id_t write_doc_id;
+@@ -831,7 +834,8 @@ void fts_parallel_tokenization(
+ /* Not yet finish processing the "doc" on hand,
+ continue processing it */
+ ut_ad(doc.text.f_str);
+- ut_ad(t_ctx.processed_len < doc.text.f_len);
++ ut_ad(buf[0]->index->parser
++ || t_ctx.processed_len < doc.text.f_len);
+ }
+
+ processed = row_merge_fts_doc_tokenize(
+@@ -841,7 +845,8 @@ void fts_parallel_tokenization(
+
+ /* Current sort buffer full, need to recycle */
+ if (!processed) {
+- ut_ad(t_ctx.processed_len < doc.text.f_len);
++ ut_ad(buf[0]->index->parser
++ || t_ctx.processed_len < doc.text.f_len);
+ ut_ad(t_ctx.rows_added[t_ctx.buf_used]);
+ break;
+ }
+--
+2.40.0