diff options
Diffstat (limited to 'meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch')
-rw-r--r-- | meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch b/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch new file mode 100644 index 0000000000..3053614854 --- /dev/null +++ b/meta-oe/recipes-dbs/mysql/mariadb/CVE-2023-22084.patch @@ -0,0 +1,91 @@ +From 15ae97b1c2c14f1263cdc853673c4129625323de Mon Sep 17 00:00:00 2001 +From: Marko Mäkelä <marko.makela@mariadb.com> +Date: Thu, 8 Feb 2024 08:09:20 +0000 +Subject: [PATCH] MDEV-32578 row_merge_fts_doc_tokenize() handles parser plugin + inconsistently + +When mysql/mysql-server@0c954c2 +added a plugin interface for FULLTEXT INDEX tokenization to MySQL 5.7, +fts_tokenize_ctx::processed_len got a second meaning, which is only +partly implemented in row_merge_fts_doc_tokenize(). + +This inconsistency could cause a crash when using FULLTEXT...WITH PARSER. +A test case that would crash MySQL 8.0 when using an n-gram parser and +single-character words would fail to crash in MySQL 5.7, because the +buf_full condition in row_merge_fts_doc_tokenize() was not met. + +This change is inspired by +mysql/mysql-server@38e9a07 +that appeared in MySQL 5.7.44. + +CVE: CVE-2023-22084 +Upstream-Status: Backport [https://github.com/MariaDB/server/commit/15ae97b1c2c1] + +Signed-off-by: Yogita Urade <yogita.urade@windriver.com> +--- + storage/innobase/include/row0ftsort.h | 6 +++++- + storage/innobase/row/row0ftsort.cc | 11 ++++++++--- + 2 files changed, 13 insertions(+), 4 deletions(-) + +diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h +index 65508caf..3ffa8243 100644 +--- a/storage/innobase/include/row0ftsort.h ++++ b/storage/innobase/include/row0ftsort.h +@@ -104,7 +104,10 @@ typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t; + + /** Structure stores information from string tokenization operation */ + struct fts_tokenize_ctx { +- ulint processed_len; /*!< processed string length */ ++ /** the processed string length in bytes ++ (when using the built-in tokenizer), ++ or the number of row_merge_fts_doc_tokenize_by_parser() calls */ ++ ulint processed_len; + ulint init_pos; /*!< doc start position */ + ulint buf_used; /*!< the sort buffer (ID) when + tokenization stops, which +@@ -115,6 +118,7 @@ struct fts_tokenize_ctx { + ib_rbt_t* cached_stopword;/*!< in: stopword list */ + dfield_t sort_field[FTS_NUM_FIELDS_SORT]; + /*!< in: sort field */ ++ /** parsed tokens (when using an external parser) */ + fts_token_list_t fts_token_list; + + fts_tokenize_ctx() : +diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc +index 86e96624..406ff60f 100644 +--- a/storage/innobase/row/row0ftsort.cc ++++ b/storage/innobase/row/row0ftsort.cc +@@ -491,7 +491,10 @@ row_merge_fts_doc_tokenize( + + /* Tokenize the data and add each word string, its corresponding + doc id and position to sort buffer */ +- while (t_ctx->processed_len < doc->text.f_len) { ++ while (parser ++ ? (!t_ctx->processed_len ++ || UT_LIST_GET_LEN(t_ctx->fts_token_list)) ++ : t_ctx->processed_len < doc->text.f_len) { + ulint idx = 0; + ulint cur_len; + doc_id_t write_doc_id; +@@ -831,7 +834,8 @@ void fts_parallel_tokenization( + /* Not yet finish processing the "doc" on hand, + continue processing it */ + ut_ad(doc.text.f_str); +- ut_ad(t_ctx.processed_len < doc.text.f_len); ++ ut_ad(buf[0]->index->parser ++ || t_ctx.processed_len < doc.text.f_len); + } + + processed = row_merge_fts_doc_tokenize( +@@ -841,7 +845,8 @@ void fts_parallel_tokenization( + + /* Current sort buffer full, need to recycle */ + if (!processed) { +- ut_ad(t_ctx.processed_len < doc.text.f_len); ++ ut_ad(buf[0]->index->parser ++ || t_ctx.processed_len < doc.text.f_len); + ut_ad(t_ctx.rows_added[t_ctx.buf_used]); + break; + } +-- +2.40.0 |