aboutsummaryrefslogtreecommitdiffstats
path: root/meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch')
-rw-r--r--meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch1154
1 files changed, 1154 insertions, 0 deletions
diff --git a/meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch b/meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch
new file mode 100644
index 0000000000..5b4e370d49
--- /dev/null
+++ b/meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch
@@ -0,0 +1,1154 @@
+Backport of:
+
+From 417da4006cf5c97d44e74431b816fc58fec9e270 Mon Sep 17 00:00:00 2001
+From: Eduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
+Date: Mon, 18 Mar 2019 17:48:21 +0000
+Subject: [PATCH] Fix incremental parsing of chunked quoted extensions (#310)
+
+Before this change, incremental parsing of quoted chunked extensions
+was broken for two reasons:
+
+* Http::One::Parser::skipLineTerminator() unexpectedly threw after
+ partially received quoted chunk extension value.
+
+* When Http::One::Tokenizer was unable to parse a quoted extension,
+ it incorrectly restored the input buffer to the beginning of the
+ extension value (instead of the extension itself), thus making
+ further incremental parsing iterations impossible.
+
+IMO, the reason for this problem was that Http::One::Tokenizer::qdText()
+could not distinguish two cases (returning false in both):
+
+* the end of the quoted string not yet reached
+
+* an input error, e.g., wrong/unexpected character
+
+A possible approach could be to improve Http::One::Tokenizer, making it
+aware about "needs more data" state. However, to be acceptable,
+these improvements should be done in the base Parser::Tokenizer
+class instead. These changes seem to be non-trivial and could be
+done separately and later.
+
+Another approach, used here, is to simplify the complex and error-prone
+chunked extensions parsing algorithm, fixing incremental parsing bugs
+and still parse incrementally in almost all cases. The performance
+regression could be expected only in relatively rare cases of partially
+received or malformed extensions.
+
+Also:
+* fixed parsing of partial use-original-body extension values
+* do not treat an invalid use-original-body as an unknown extension
+* optimization: parse use-original-body extension only in ICAP context
+ (i.e., where it is expected)
+* improvement: added a new API to TeChunkedParser to specify known
+ chunked extensions list
+
+Upstream-Status: Backport [import from ubuntu https://git.launchpad.net/ubuntu/+source/squid/tree/debian/patches/CVE-2023-46846-pre1.patch?h=ubuntu/focal-security&id=9ccd217ca9428c9a6597e9310a99552026b245fa
+Upstream commit https://github.com/squid-cache/squid/commit/417da4006cf5c97d44e74431b816fc58fec9e270]
+CVE: CVE-2023-46846 #Dependency Patch1
+Signed-off-by: Vijay Anusuri <vanusuri@mvista.com>
+---
+ src/adaptation/icap/ModXact.cc | 21 ++++-
+ src/adaptation/icap/ModXact.h | 20 +++++
+ src/http/one/Parser.cc | 35 ++++----
+ src/http/one/Parser.h | 10 ++-
+ src/http/one/RequestParser.cc | 16 ++--
+ src/http/one/RequestParser.h | 8 +-
+ src/http/one/ResponseParser.cc | 17 ++--
+ src/http/one/ResponseParser.h | 2 +-
+ src/http/one/TeChunkedParser.cc | 139 ++++++++++++++++++--------------
+ src/http/one/TeChunkedParser.h | 41 ++++++++--
+ src/http/one/Tokenizer.cc | 104 ++++++++++++------------
+ src/http/one/Tokenizer.h | 89 ++++++++------------
+ src/http/one/forward.h | 3 +
+ src/parser/BinaryTokenizer.h | 3 +-
+ src/parser/Makefile.am | 1 +
+ src/parser/Tokenizer.cc | 40 +++++++++
+ src/parser/Tokenizer.h | 13 +++
+ src/parser/forward.h | 22 +++++
+ 18 files changed, 364 insertions(+), 220 deletions(-)
+ create mode 100644 src/parser/forward.h
+
+--- a/src/adaptation/icap/ModXact.cc
++++ b/src/adaptation/icap/ModXact.cc
+@@ -25,12 +25,13 @@
+ #include "comm.h"
+ #include "comm/Connection.h"
+ #include "err_detail_type.h"
+-#include "http/one/TeChunkedParser.h"
+ #include "HttpHeaderTools.h"
+ #include "HttpMsg.h"
+ #include "HttpReply.h"
+ #include "HttpRequest.h"
+ #include "MasterXaction.h"
++#include "parser/Tokenizer.h"
++#include "sbuf/Stream.h"
+ #include "SquidTime.h"
+
+ // flow and terminology:
+@@ -44,6 +45,8 @@ CBDATA_NAMESPACED_CLASS_INIT(Adaptation:
+
+ static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
+
++const SBuf Adaptation::Icap::ChunkExtensionValueParser::UseOriginalBodyName("use-original-body");
++
+ Adaptation::Icap::ModXact::State::State()
+ {
+ memset(this, 0, sizeof(*this));
+@@ -1108,6 +1111,7 @@ void Adaptation::Icap::ModXact::decideOn
+ state.parsing = State::psBody;
+ replyHttpBodySize = 0;
+ bodyParser = new Http1::TeChunkedParser;
++ bodyParser->parseExtensionValuesWith(&extensionParser);
+ makeAdaptedBodyPipe("adapted response from the ICAP server");
+ Must(state.sending == State::sendingAdapted);
+ } else {
+@@ -1142,9 +1146,8 @@ void Adaptation::Icap::ModXact::parseBod
+ }
+
+ if (parsed) {
+- if (state.readyForUob && bodyParser->useOriginBody >= 0) {
+- prepPartialBodyEchoing(
+- static_cast<uint64_t>(bodyParser->useOriginBody));
++ if (state.readyForUob && extensionParser.sawUseOriginalBody()) {
++ prepPartialBodyEchoing(extensionParser.useOriginalBody());
+ stopParsing();
+ return;
+ }
+@@ -2014,3 +2017,14 @@ void Adaptation::Icap::ModXactLauncher::
+ }
+ }
+
++void
++Adaptation::Icap::ChunkExtensionValueParser::parse(Tokenizer &tok, const SBuf &extName)
++{
++ if (extName == UseOriginalBodyName) {
++ useOriginalBody_ = tok.udec64("use-original-body");
++ assert(useOriginalBody_ >= 0);
++ } else {
++ Ignore(tok, extName);
++ }
++}
++
+--- a/src/adaptation/icap/ModXact.h
++++ b/src/adaptation/icap/ModXact.h
+@@ -15,6 +15,7 @@
+ #include "adaptation/icap/Xaction.h"
+ #include "BodyPipe.h"
+ #include "http/one/forward.h"
++#include "http/one/TeChunkedParser.h"
+
+ /*
+ * ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
+@@ -105,6 +106,23 @@ private:
+ enum State { stDisabled, stWriting, stIeof, stDone } theState;
+ };
+
++/// handles ICAP-specific chunk extensions supported by Squid
++class ChunkExtensionValueParser: public Http1::ChunkExtensionValueParser
++{
++public:
++ /* Http1::ChunkExtensionValueParser API */
++ virtual void parse(Tokenizer &tok, const SBuf &extName) override;
++
++ bool sawUseOriginalBody() const { return useOriginalBody_ >= 0; }
++ uint64_t useOriginalBody() const { assert(sawUseOriginalBody()); return static_cast<uint64_t>(useOriginalBody_); }
++
++private:
++ static const SBuf UseOriginalBodyName;
++
++ /// the value of the parsed use-original-body chunk extension (or -1)
++ int64_t useOriginalBody_ = -1;
++};
++
+ class ModXact: public Xaction, public BodyProducer, public BodyConsumer
+ {
+ CBDATA_CLASS(ModXact);
+@@ -270,6 +288,8 @@ private:
+
+ int adaptHistoryId; ///< adaptation history slot reservation
+
++ ChunkExtensionValueParser extensionParser;
++
+ class State
+ {
+
+--- a/src/http/one/Parser.cc
++++ b/src/http/one/Parser.cc
+@@ -7,10 +7,11 @@
+ */
+
+ #include "squid.h"
++#include "base/CharacterSet.h"
+ #include "Debug.h"
+ #include "http/one/Parser.h"
+-#include "http/one/Tokenizer.h"
+ #include "mime_header.h"
++#include "parser/Tokenizer.h"
+ #include "SquidConfig.h"
+
+ /// RFC 7230 section 2.6 - 7 magic octets
+@@ -61,20 +62,19 @@ Http::One::Parser::DelimiterCharacters()
+ RelaxedDelimiterCharacters() : CharacterSet::SP;
+ }
+
+-bool
+-Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
++void
++Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
+ {
+ if (tok.skip(Http1::CrLf()))
+- return true;
++ return;
+
+ if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
+- return true;
++ return;
+
+ if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
+- return false; // need more data
++ throw InsufficientInput();
+
+ throw TexcHere("garbage instead of CRLF line terminator");
+- return false; // unreachable, but make naive compilers happy
+ }
+
+ /// all characters except the LF line terminator
+@@ -102,7 +102,7 @@ LineCharacters()
+ void
+ Http::One::Parser::cleanMimePrefix()
+ {
+- Http1::Tokenizer tok(mimeHeaderBlock_);
++ Tokenizer tok(mimeHeaderBlock_);
+ while (tok.skipOne(RelaxedDelimiterCharacters())) {
+ (void)tok.skipAll(LineCharacters()); // optional line content
+ // LF terminator is required.
+@@ -137,7 +137,7 @@ Http::One::Parser::cleanMimePrefix()
+ void
+ Http::One::Parser::unfoldMime()
+ {
+- Http1::Tokenizer tok(mimeHeaderBlock_);
++ Tokenizer tok(mimeHeaderBlock_);
+ const auto szLimit = mimeHeaderBlock_.length();
+ mimeHeaderBlock_.clear();
+ // prevent the mime sender being able to make append() realloc/grow multiple times.
+@@ -228,7 +228,7 @@ Http::One::Parser::getHostHeaderField()
+ debugs(25, 5, "looking for " << name);
+
+ // while we can find more LF in the SBuf
+- Http1::Tokenizer tok(mimeHeaderBlock_);
++ Tokenizer tok(mimeHeaderBlock_);
+ SBuf p;
+
+ while (tok.prefix(p, LineCharacters())) {
+@@ -250,7 +250,7 @@ Http::One::Parser::getHostHeaderField()
+ p.consume(namelen + 1);
+
+ // TODO: optimize SBuf::trim to take CharacterSet directly
+- Http1::Tokenizer t(p);
++ Tokenizer t(p);
+ t.skipAll(CharacterSet::WSP);
+ p = t.remaining();
+
+@@ -278,10 +278,15 @@ Http::One::ErrorLevel()
+ }
+
+ // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
+-bool
+-Http::One::ParseBws(Tokenizer &tok)
++void
++Http::One::ParseBws(Parser::Tokenizer &tok)
+ {
+- if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
++ const auto count = tok.skipAll(Parser::WhitespaceCharacters());
++
++ if (tok.atEnd())
++ throw InsufficientInput(); // even if count is positive
++
++ if (count) {
+ // Generating BWS is a MUST-level violation so warn about it as needed.
+ debugs(33, ErrorLevel(), "found " << count << " BWS octets");
+ // RFC 7230 says we MUST parse BWS, so we fall through even if
+@@ -289,6 +294,6 @@ Http::One::ParseBws(Tokenizer &tok)
+ }
+ // else we successfully "parsed" an empty BWS sequence
+
+- return true;
++ // success: no more BWS characters expected
+ }
+
+--- a/src/http/one/Parser.h
++++ b/src/http/one/Parser.h
+@@ -12,6 +12,7 @@
+ #include "anyp/ProtocolVersion.h"
+ #include "http/one/forward.h"
+ #include "http/StatusCode.h"
++#include "parser/forward.h"
+ #include "sbuf/SBuf.h"
+
+ namespace Http {
+@@ -40,6 +41,7 @@ class Parser : public RefCountable
+ {
+ public:
+ typedef SBuf::size_type size_type;
++ typedef ::Parser::Tokenizer Tokenizer;
+
+ Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE), hackExpectsMime_(false) {}
+ virtual ~Parser() {}
+@@ -118,11 +120,11 @@ protected:
+ * detect and skip the CRLF or (if tolerant) LF line terminator
+ * consume from the tokenizer.
+ *
+- * throws if non-terminator is detected.
++ * \throws exception on bad or InsuffientInput.
+ * \retval true only if line terminator found.
+ * \retval false incomplete or missing line terminator, need more data.
+ */
+- bool skipLineTerminator(Http1::Tokenizer &tok) const;
++ void skipLineTerminator(Tokenizer &) const;
+
+ /**
+ * Scan to find the mime headers block for current message.
+@@ -159,8 +161,8 @@ private:
+ };
+
+ /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
+-/// \returns true (always; unlike all the skip*() functions)
+-bool ParseBws(Tokenizer &tok);
++/// \throws InsufficientInput when the end of BWS cannot be confirmed
++void ParseBws(Parser::Tokenizer &);
+
+ /// the right debugs() level for logging HTTP violation messages
+ int ErrorLevel();
+--- a/src/http/one/RequestParser.cc
++++ b/src/http/one/RequestParser.cc
+@@ -9,8 +9,8 @@
+ #include "squid.h"
+ #include "Debug.h"
+ #include "http/one/RequestParser.h"
+-#include "http/one/Tokenizer.h"
+ #include "http/ProtocolVersion.h"
++#include "parser/Tokenizer.h"
+ #include "profiler/Profiler.h"
+ #include "SquidConfig.h"
+
+@@ -64,7 +64,7 @@ Http::One::RequestParser::skipGarbageLin
+ * RFC 7230 section 2.6, 3.1 and 3.5
+ */
+ bool
+-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
++Http::One::RequestParser::parseMethodField(Tokenizer &tok)
+ {
+ // method field is a sequence of TCHAR.
+ // Limit to 32 characters to prevent overly long sequences of non-HTTP
+@@ -145,7 +145,7 @@ Http::One::RequestParser::RequestTargetC
+ }
+
+ bool
+-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
++Http::One::RequestParser::parseUriField(Tokenizer &tok)
+ {
+ /* Arbitrary 64KB URI upper length limit.
+ *
+@@ -178,7 +178,7 @@ Http::One::RequestParser::parseUriField(
+ }
+
+ bool
+-Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
++Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
+ {
+ static const SBuf http1p0("HTTP/1.0");
+ static const SBuf http1p1("HTTP/1.1");
+@@ -253,7 +253,7 @@ Http::One::RequestParser::skipDelimiter(
+
+ /// Parse CRs at the end of request-line, just before the terminating LF.
+ bool
+-Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
++Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
+ {
+ if (Config.onoff.relaxed_header_parser) {
+ (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
+@@ -289,12 +289,12 @@ Http::One::RequestParser::parseRequestFi
+ // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
+ // Now, the request line has to end at the first LF.
+ static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
+- ::Parser::Tokenizer lineTok(buf_);
++ Tokenizer lineTok(buf_);
+ if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
+ if (buf_.length() >= Config.maxRequestHeaderSize) {
+ /* who should we blame for our failure to parse this line? */
+
+- Http1::Tokenizer methodTok(buf_);
++ Tokenizer methodTok(buf_);
+ if (!parseMethodField(methodTok))
+ return -1; // blame a bad method (or its delimiter)
+
+@@ -308,7 +308,7 @@ Http::One::RequestParser::parseRequestFi
+ return 0;
+ }
+
+- Http1::Tokenizer tok(line);
++ Tokenizer tok(line);
+
+ if (!parseMethodField(tok))
+ return -1;
+--- a/src/http/one/RequestParser.h
++++ b/src/http/one/RequestParser.h
+@@ -54,11 +54,11 @@ private:
+ bool doParse(const SBuf &aBuf);
+
+ /* all these return false and set parseStatusCode on parsing failures */
+- bool parseMethodField(Http1::Tokenizer &);
+- bool parseUriField(Http1::Tokenizer &);
+- bool parseHttpVersionField(Http1::Tokenizer &);
++ bool parseMethodField(Tokenizer &);
++ bool parseUriField(Tokenizer &);
++ bool parseHttpVersionField(Tokenizer &);
+ bool skipDelimiter(const size_t count, const char *where);
+- bool skipTrailingCrs(Http1::Tokenizer &tok);
++ bool skipTrailingCrs(Tokenizer &tok);
+
+ bool http0() const {return !msgProtocol_.major;}
+ static const CharacterSet &RequestTargetCharacters();
+--- a/src/http/one/ResponseParser.cc
++++ b/src/http/one/ResponseParser.cc
+@@ -9,8 +9,8 @@
+ #include "squid.h"
+ #include "Debug.h"
+ #include "http/one/ResponseParser.h"
+-#include "http/one/Tokenizer.h"
+ #include "http/ProtocolVersion.h"
++#include "parser/Tokenizer.h"
+ #include "profiler/Profiler.h"
+ #include "SquidConfig.h"
+
+@@ -47,7 +47,7 @@ Http::One::ResponseParser::firstLineSize
+ // NP: we found the protocol version and consumed it already.
+ // just need the status code and reason phrase
+ int
+-Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
++Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim)
+ {
+ if (!completedStatus_) {
+ debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
+@@ -87,14 +87,13 @@ Http::One::ResponseParser::parseResponse
+ static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
+ (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
+ try {
+- if (skipLineTerminator(tok)) {
+- debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+- buf_ = tok.remaining(); // resume checkpoint
+- return 1;
+- }
++ skipLineTerminator(tok);
++ buf_ = tok.remaining(); // resume checkpoint
++ debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
++ return 1;
++ } catch (const InsufficientInput &) {
+ reasonPhrase_.clear();
+ return 0; // need more to be sure we have it all
+-
+ } catch (const std::exception &ex) {
+ debugs(74, 6, "invalid status-line: " << ex.what());
+ }
+@@ -119,7 +118,7 @@ Http::One::ResponseParser::parseResponse
+ int
+ Http::One::ResponseParser::parseResponseFirstLine()
+ {
+- Http1::Tokenizer tok(buf_);
++ Tokenizer tok(buf_);
+
+ const CharacterSet &WspDelim = DelimiterCharacters();
+
+--- a/src/http/one/ResponseParser.h
++++ b/src/http/one/ResponseParser.h
+@@ -43,7 +43,7 @@ public:
+
+ private:
+ int parseResponseFirstLine();
+- int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &);
++ int parseResponseStatusAndReason(Tokenizer&, const CharacterSet &);
+
+ /// magic prefix for identifying ICY response messages
+ static const SBuf IcyMagic;
+--- a/src/http/one/TeChunkedParser.cc
++++ b/src/http/one/TeChunkedParser.cc
+@@ -13,10 +13,13 @@
+ #include "http/one/Tokenizer.h"
+ #include "http/ProtocolVersion.h"
+ #include "MemBuf.h"
++#include "parser/Tokenizer.h"
+ #include "Parsing.h"
++#include "sbuf/Stream.h"
+ #include "SquidConfig.h"
+
+-Http::One::TeChunkedParser::TeChunkedParser()
++Http::One::TeChunkedParser::TeChunkedParser():
++ customExtensionValueParser(nullptr)
+ {
+ // chunked encoding only exists in HTTP/1.1
+ Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
+@@ -31,7 +34,11 @@ Http::One::TeChunkedParser::clear()
+ buf_.clear();
+ theChunkSize = theLeftBodySize = 0;
+ theOut = NULL;
+- useOriginBody = -1;
++ // XXX: We do not reset customExtensionValueParser here. Based on the
++ // clear() API description, we must, but it makes little sense and could
++ // break method callers if they appear because some of them may forget to
++ // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
++ // parent class and this unnecessary method with it.
+ }
+
+ bool
+@@ -49,14 +56,14 @@ Http::One::TeChunkedParser::parse(const
+ if (parsingStage_ == Http1::HTTP_PARSE_NONE)
+ parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+
+- Http1::Tokenizer tok(buf_);
++ Tokenizer tok(buf_);
+
+ // loop for as many chunks as we can
+ // use do-while instead of while so that we can incrementally
+ // restart in the middle of a chunk/frame
+ do {
+
+- if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
++ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
+ return false;
+
+ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
+@@ -80,7 +87,7 @@ Http::One::TeChunkedParser::needsMoreSpa
+
+ /// RFC 7230 section 4.1 chunk-size
+ bool
+-Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
++Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
+ {
+ Must(theChunkSize <= 0); // Should(), really
+
+@@ -104,66 +111,75 @@ Http::One::TeChunkedParser::parseChunkSi
+ return false; // should not be reachable
+ }
+
+-/**
+- * Parses chunk metadata suffix, looking for interesting extensions and/or
+- * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667:
+- *
+- * chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+- * chunk-ext-name = token
+- * chunk-ext-val = token / quoted-string
+- *
+- * ICAP 'use-original-body=N' extension is supported.
+- */
+-bool
+-Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
+-{
+- SBuf ext;
+- SBuf value;
+- while (
+- ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+- tok.skip(';') &&
+- ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name
+- tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name
+-
+- // whole value part is optional. if no '=' expect next chunk-ext
+- if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) {
+-
+- if (!skipKnown) {
+- if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
+- debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
+- buf_ = tok.remaining(); // parse checkpoint
+- continue;
+- }
+- }
+-
+- debugs(94, 5, "skipping unknown chunk extension " << ext);
+-
+- // unknown might have a value token or quoted-string
+- if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
+- buf_ = tok.remaining(); // parse checkpoint
+- continue;
+- }
+-
+- // otherwise need more data OR corrupt syntax
+- break;
+- }
+-
+- if (!tok.atEnd())
+- buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
+- }
+-
+- if (skipLineTerminator(tok)) {
+- buf_ = tok.remaining(); // checkpoint
+- // non-0 chunk means data, 0-size means optional Trailer follows
++/// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
++/// chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
++/// last-chunk = 1*"0" [ chunk-ext ] CRLF
++bool
++Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
++{
++ // Code becomes much simpler when incremental parsing functions throw on
++ // bad or insufficient input, like in the code below. TODO: Expand up.
++ try {
++ parseChunkExtensions(tok); // a possibly empty chunk-ext list
++ skipLineTerminator(tok);
++ buf_ = tok.remaining();
+ parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
+ return true;
++ } catch (const InsufficientInput &) {
++ tok.reset(buf_); // backtrack to the last commit point
++ return false;
+ }
++ // other exceptions bubble up to kill message parsing
++}
++
++/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
++void
++Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
++{
++ do {
++ ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+
+- return false;
++ if (!tok.skip(';'))
++ return; // reached the end of extensions (if any)
++
++ parseOneChunkExtension(tok);
++ buf_ = tok.remaining(); // got one extension
++ } while (true);
++}
++
++void
++Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName)
++{
++ const auto ignoredValue = tokenOrQuotedString(tok);
++ debugs(94, 5, extName << " with value " << ignoredValue);
++}
++
++/// Parses a single chunk-ext list element:
++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
++void
++Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
++{
++ ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
++
++ const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
++
++ ParseBws(tok);
++
++ if (!tok.skip('='))
++ return; // parsed a valueless chunk-ext
++
++ ParseBws(tok);
++
++ // optimization: the only currently supported extension needs last-chunk
++ if (!theChunkSize && customExtensionValueParser)
++ customExtensionValueParser->parse(tok, extName);
++ else
++ ChunkExtensionValueParser::Ignore(tok, extName);
+ }
+
+ bool
+-Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
++Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok)
+ {
+ if (theLeftBodySize > 0) {
+ buf_ = tok.remaining(); // sync buffers before buf_ use
+@@ -188,17 +204,20 @@ Http::One::TeChunkedParser::parseChunkBo
+ }
+
+ bool
+-Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
++Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
+ {
+ Must(theLeftBodySize == 0); // Should(), really
+
+- if (skipLineTerminator(tok)) {
++ try {
++ skipLineTerminator(tok);
+ buf_ = tok.remaining(); // parse checkpoint
+ theChunkSize = 0; // done with the current chunk
+ parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+ return true;
+ }
+-
+- return false;
++ catch (const InsufficientInput &) {
++ return false;
++ }
++ // other exceptions bubble up to kill message parsing
+ }
+
+--- a/src/http/one/TeChunkedParser.h
++++ b/src/http/one/TeChunkedParser.h
+@@ -18,6 +18,26 @@ namespace Http
+ namespace One
+ {
+
++using ::Parser::InsufficientInput;
++
++// TODO: Move this class into http/one/ChunkExtensionValueParser.*
++/// A customizable parser of a single chunk extension value (chunk-ext-val).
++/// From RFC 7230 section 4.1.1 and its Errata #4667:
++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
++/// chunk-ext-name = token
++/// chunk-ext-val = token / quoted-string
++class ChunkExtensionValueParser
++{
++public:
++ typedef ::Parser::Tokenizer Tokenizer;
++
++ /// extracts and ignores the value of a named extension
++ static void Ignore(Tokenizer &tok, const SBuf &extName);
++
++ /// extracts and then interprets (or ignores) the extension value
++ virtual void parse(Tokenizer &tok, const SBuf &extName) = 0;
++};
++
+ /**
+ * An incremental parser for chunked transfer coding
+ * defined in RFC 7230 section 4.1.
+@@ -25,7 +45,7 @@ namespace One
+ *
+ * The parser shovels content bytes from the raw
+ * input buffer into the content output buffer, both caller-supplied.
+- * Ignores chunk extensions except for ICAP's ieof.
++ * Chunk extensions like use-original-body are handled via parseExtensionValuesWith().
+ * Trailers are available via mimeHeader() if wanted.
+ */
+ class TeChunkedParser : public Http1::Parser
+@@ -37,6 +57,10 @@ public:
+ /// set the buffer to be used to store decoded chunk data
+ void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
+
++ /// Instead of ignoring all chunk extension values, give the supplied
++ /// parser a chance to handle them. Only applied to last-chunk (for now).
++ void parseExtensionValuesWith(ChunkExtensionValueParser *parser) { customExtensionValueParser = parser; }
++
+ bool needsMoreSpace() const;
+
+ /* Http1::Parser API */
+@@ -45,17 +69,20 @@ public:
+ virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
+
+ private:
+- bool parseChunkSize(Http1::Tokenizer &tok);
+- bool parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown);
+- bool parseChunkBody(Http1::Tokenizer &tok);
+- bool parseChunkEnd(Http1::Tokenizer &tok);
++ bool parseChunkSize(Tokenizer &tok);
++ bool parseChunkMetadataSuffix(Tokenizer &);
++ void parseChunkExtensions(Tokenizer &);
++ void parseOneChunkExtension(Tokenizer &);
++ bool parseChunkBody(Tokenizer &tok);
++ bool parseChunkEnd(Tokenizer &tok);
+
+ MemBuf *theOut;
+ uint64_t theChunkSize;
+ uint64_t theLeftBodySize;
+
+-public:
+- int64_t useOriginBody;
++ /// An optional plugin for parsing and interpreting custom chunk-ext-val.
++ /// This "visitor" object is owned by our creator.
++ ChunkExtensionValueParser *customExtensionValueParser;
+ };
+
+ } // namespace One
+--- a/src/http/one/Tokenizer.cc
++++ b/src/http/one/Tokenizer.cc
+@@ -8,35 +8,18 @@
+
+ #include "squid.h"
+ #include "Debug.h"
++#include "http/one/Parser.h"
+ #include "http/one/Tokenizer.h"
++#include "parser/Tokenizer.h"
++#include "sbuf/Stream.h"
+
+-bool
+-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
++/// Extracts quoted-string after the caller removes the initial '"'.
++/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
++/// \throws InsufficientInput when input can be a token _prefix_
++/// \returns extracted quoted string (without quotes and with chars unescaped)
++static SBuf
++parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
+ {
+- checkpoint();
+-
+- if (!skip('"'))
+- return false;
+-
+- return qdText(returnedToken, http1p0);
+-}
+-
+-bool
+-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
+-{
+- checkpoint();
+-
+- if (!skip('"'))
+- return prefix(returnedToken, CharacterSet::TCHAR);
+-
+- return qdText(returnedToken, http1p0);
+-}
+-
+-bool
+-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
+-{
+- // the initial DQUOTE has been skipped by the caller
+-
+ /*
+ * RFC 1945 - defines qdtext:
+ * inclusive of LWS (which includes CR and LF)
+@@ -61,12 +44,17 @@ Http::One::Tokenizer::qdText(SBuf &retur
+ // best we can do is a conditional reference since http1p0 value may change per-client
+ const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
+
+- for (;;) {
+- SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
+- returnedToken.append(consume(prefixLen));
++ SBuf parsedToken;
++
++ while (!tok.atEnd()) {
++ SBuf qdText;
++ if (tok.prefix(qdText, tokenChars))
++ parsedToken.append(qdText);
++
++ if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
++ if (tok.atEnd())
++ break;
+
+- // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+- if (!http1p0 && skip('\\')) {
+ /* RFC 7230 section 3.2.6
+ *
+ * The backslash octet ("\") can be used as a single-octet quoting
+@@ -78,32 +66,42 @@ Http::One::Tokenizer::qdText(SBuf &retur
+ */
+ static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
+ SBuf escaped;
+- if (!prefix(escaped, qPairChars, 1)) {
+- returnedToken.clear();
+- restoreLastCheckpoint();
+- return false;
+- }
+- returnedToken.append(escaped);
++ if (!tok.prefix(escaped, qPairChars, 1))
++ throw TexcHere("invalid escaped character in quoted-pair");
++
++ parsedToken.append(escaped);
+ continue;
++ }
+
+- } else if (skip('"')) {
+- break; // done
++ if (tok.skip('"'))
++ return parsedToken; // may be empty
+
+- } else if (atEnd()) {
+- // need more data
+- returnedToken.clear();
+- restoreLastCheckpoint();
+- return false;
+- }
++ if (tok.atEnd())
++ break;
+
+- // else, we have an error
+- debugs(24, 8, "invalid bytes for set " << tokenChars.name);
+- returnedToken.clear();
+- restoreLastCheckpoint();
+- return false;
++ throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
+ }
+
+- // found the whole string
+- return true;
++ throw Http::One::InsufficientInput();
++}
++
++SBuf
++Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
++{
++ if (tok.skip('"'))
++ return parseQuotedStringSuffix(tok, http1p0);
++
++ if (tok.atEnd())
++ throw InsufficientInput();
++
++ SBuf parsedToken;
++ if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
++ throw TexcHere("invalid input while expecting an HTTP token");
++
++ if (tok.atEnd())
++ throw InsufficientInput();
++
++ // got the complete token
++ return parsedToken;
+ }
+
+--- a/src/http/one/Tokenizer.h
++++ b/src/http/one/Tokenizer.h
+@@ -9,68 +9,47 @@
+ #ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H
+ #define SQUID_SRC_HTTP_ONE_TOKENIZER_H
+
+-#include "parser/Tokenizer.h"
++#include "parser/forward.h"
++#include "sbuf/forward.h"
+
+ namespace Http {
+ namespace One {
+
+ /**
+- * Lexical processor extended to tokenize HTTP/1.x syntax.
++ * Extracts either an HTTP/1 token or quoted-string while dealing with
++ * possibly incomplete input typical for incremental text parsers.
++ * Unescapes escaped characters in HTTP/1.1 quoted strings.
+ *
+- * \see ::Parser::Tokenizer for more detail
++ * \param http1p0 whether to prohibit \-escaped characters in quoted strings
++ * \throws InsufficientInput as appropriate, including on unterminated tokens
++ * \returns extracted token or quoted string (without quotes)
++ *
++ * Governed by:
++ * - RFC 1945 section 2.1
++ * "
++ * A string of text is parsed as a single word if it is quoted using
++ * double-quote marks.
++ *
++ * quoted-string = ( <"> *(qdtext) <"> )
++ *
++ * qdtext = <any CHAR except <"> and CTLs,
++ * but including LWS>
++ *
++ * Single-character quoting using the backslash ("\") character is not
++ * permitted in HTTP/1.0.
++ * "
++ *
++ * - RFC 7230 section 3.2.6
++ * "
++ * A string of text is parsed as a single value if it is quoted using
++ * double-quote marks.
++ *
++ * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
++ * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
++ * obs-text = %x80-FF
++ * "
+ */
+-class Tokenizer : public ::Parser::Tokenizer
+-{
+-public:
+- Tokenizer(SBuf &s) : ::Parser::Tokenizer(s), savedStats_(0) {}
+-
+- /**
+- * Attempt to parse a quoted-string lexical construct.
+- *
+- * Governed by:
+- * - RFC 1945 section 2.1
+- * "
+- * A string of text is parsed as a single word if it is quoted using
+- * double-quote marks.
+- *
+- * quoted-string = ( <"> *(qdtext) <"> )
+- *
+- * qdtext = <any CHAR except <"> and CTLs,
+- * but including LWS>
+- *
+- * Single-character quoting using the backslash ("\") character is not
+- * permitted in HTTP/1.0.
+- * "
+- *
+- * - RFC 7230 section 3.2.6
+- * "
+- * A string of text is parsed as a single value if it is quoted using
+- * double-quote marks.
+- *
+- * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+- * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+- * obs-text = %x80-FF
+- * "
+- *
+- * \param escaped HTTP/1.0 does not permit \-escaped characters
+- */
+- bool quotedString(SBuf &value, const bool http1p0 = false);
+-
+- /**
+- * Attempt to parse a (token / quoted-string ) lexical construct.
+- */
+- bool quotedStringOrToken(SBuf &value, const bool http1p0 = false);
+-
+-private:
+- /// parse the internal component of a quote-string, and terminal DQUOTE
+- bool qdText(SBuf &value, const bool http1p0);
+-
+- void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); }
+- void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); }
+-
+- SBuf savedCheckpoint_;
+- SBuf::size_type savedStats_;
+-};
++SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0 = false);
+
+ } // namespace One
+ } // namespace Http
+--- a/src/http/one/forward.h
++++ b/src/http/one/forward.h
+@@ -10,6 +10,7 @@
+ #define SQUID_SRC_HTTP_ONE_FORWARD_H
+
+ #include "base/RefCount.h"
++#include "parser/forward.h"
+ #include "sbuf/forward.h"
+
+ namespace Http {
+@@ -31,6 +32,8 @@ typedef RefCount<Http::One::ResponsePars
+ /// CRLF textual representation
+ const SBuf &CrLf();
+
++using ::Parser::InsufficientInput;
++
+ } // namespace One
+ } // namespace Http
+
+--- a/src/parser/BinaryTokenizer.h
++++ b/src/parser/BinaryTokenizer.h
+@@ -9,6 +9,7 @@
+ #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
+ #define SQUID_SRC_PARSER_BINARYTOKENIZER_H
+
++#include "parser/forward.h"
+ #include "sbuf/SBuf.h"
+
+ namespace Parser
+@@ -44,7 +45,7 @@ public:
+ class BinaryTokenizer
+ {
+ public:
+- class InsufficientInput {}; // thrown when a method runs out of data
++ typedef ::Parser::InsufficientInput InsufficientInput;
+ typedef uint64_t size_type; // enough for the largest supported offset
+
+ BinaryTokenizer();
+--- a/src/parser/Makefile.am
++++ b/src/parser/Makefile.am
+@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la
+ libparser_la_SOURCES = \
+ BinaryTokenizer.h \
+ BinaryTokenizer.cc \
++ forward.h \
+ Tokenizer.h \
+ Tokenizer.cc
+
+--- a/src/parser/Tokenizer.cc
++++ b/src/parser/Tokenizer.cc
+@@ -10,7 +10,9 @@
+
+ #include "squid.h"
+ #include "Debug.h"
++#include "parser/forward.h"
+ #include "parser/Tokenizer.h"
++#include "sbuf/Stream.h"
+
+ #include <cerrno>
+ #if HAVE_CTYPE_H
+@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returned
+ return true;
+ }
+
++SBuf
++Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
++{
++ if (atEnd())
++ throw InsufficientInput();
++
++ SBuf result;
++
++ if (!prefix(result, tokenChars, limit))
++ throw TexcHere(ToSBuf("cannot parse ", description));
++
++ if (atEnd())
++ throw InsufficientInput();
++
++ return result;
++}
++
+ bool
+ Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
+ {
+@@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & resul
+ return success(s - range.rawContent());
+ }
+
++int64_t
++Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
++{
++ if (atEnd())
++ throw InsufficientInput();
++
++ int64_t result = 0;
++
++ // Since we only support unsigned decimals, a parsing failure with a
++ // non-empty input always implies invalid/malformed input (or a buggy
++ // limit=0 caller). TODO: Support signed and non-decimal integers by
++ // refactoring int64() to detect insufficient input.
++ if (!int64(result, 10, false, limit))
++ throw TexcHere(ToSBuf("cannot parse ", description));
++
++ if (atEnd())
++ throw InsufficientInput(); // more digits may be coming
++
++ return result;
++}
++
+--- a/src/parser/Tokenizer.h
++++ b/src/parser/Tokenizer.h
+@@ -143,6 +143,19 @@ public:
+ */
+ bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
+
++ /*
++ * The methods below mimic their counterparts documented above, but they
++ * throw on errors, including InsufficientInput. The field description
++ * parameter is used for error reporting and debugging.
++ */
++
++ /// prefix() wrapper but throws InsufficientInput if input contains
++ /// nothing but the prefix (i.e. if the prefix is not "terminated")
++ SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
++
++ /// int64() wrapper but limited to unsigned decimal integers (for now)
++ int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
++
+ protected:
+ SBuf consume(const SBuf::size_type n);
+ SBuf::size_type success(const SBuf::size_type n);
+--- /dev/null
++++ b/src/parser/forward.h
+@@ -0,0 +1,22 @@
++/*
++ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
++ *
++ * Squid software is distributed under GPLv2+ license and includes
++ * contributions from numerous individuals and organizations.
++ * Please see the COPYING and CONTRIBUTORS files for details.
++ */
++
++#ifndef SQUID_PARSER_FORWARD_H
++#define SQUID_PARSER_FORWARD_H
++
++namespace Parser {
++class Tokenizer;
++class BinaryTokenizer;
++
++// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
++/// thrown by modern "incremental" parsers when they need more data
++class InsufficientInput {};
++} // namespace Parser
++
++#endif /* SQUID_PARSER_FORWARD_H */
++