1 files changed, 1154 insertions, 0 deletions
diff --git a/meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch b/meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch
new file mode 100644
index 0000000000..5b4e370d49
--- /dev/null
+++ b/meta-networking/recipes-daemons/squid/files/CVE-2023-46846-pre1.patch
@@ -0,0 +1,1154 @@
+Backport of:
+
+From 417da4006cf5c97d44e74431b816fc58fec9e270 Mon Sep 17 00:00:00 2001
+From: Eduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
+Date: Mon, 18 Mar 2019 17:48:21 +0000
+Subject: [PATCH] Fix incremental parsing of chunked quoted extensions (#310)
+
+Before this change, incremental parsing of quoted chunked extensions
+was broken for two reasons:
+
+* Http::One::Parser::skipLineTerminator() unexpectedly threw after
+  partially received quoted chunk extension value.
+
+* When Http::One::Tokenizer was unable to parse a quoted extension,
+  it incorrectly restored the input buffer to the beginning of the
+  extension value (instead of the extension itself), thus making
+  further incremental parsing iterations impossible.
+
+IMO, the reason for this problem was that Http::One::Tokenizer::qdText()
+could not distinguish two cases (returning false in both):
+
+* the end of the quoted string not yet reached
+
+* an input error, e.g., wrong/unexpected character
+
+A possible approach could be to improve Http::One::Tokenizer, making it
+aware about "needs more data" state.  However, to be acceptable,
+these improvements should be done in the base Parser::Tokenizer
+class instead. These changes seem to be non-trivial and could be
+done separately and later.
+
+Another approach, used here, is to simplify the complex and error-prone
+chunked extensions parsing algorithm, fixing incremental parsing bugs
+and still parse incrementally in almost all cases. The performance
+regression could be expected only in relatively rare cases of partially
+received or malformed extensions.
+
+Also:
+* fixed parsing of partial use-original-body extension values
+* do not treat an invalid use-original-body as an unknown extension
+* optimization: parse use-original-body extension only in ICAP context
+  (i.e., where it is expected)
+* improvement: added a new API to TeChunkedParser to specify known
+  chunked extensions list
+
+Upstream-Status: Backport [import from ubuntu https://git.launchpad.net/ubuntu/+source/squid/tree/debian/patches/CVE-2023-46846-pre1.patch?h=ubuntu/focal-security&id=9ccd217ca9428c9a6597e9310a99552026b245fa
+Upstream commit https://github.com/squid-cache/squid/commit/417da4006cf5c97d44e74431b816fc58fec9e270]
+CVE: CVE-2023-46846 #Dependency Patch1
+Signed-off-by: Vijay Anusuri <vanusuri@mvista.com>
+---
+ src/adaptation/icap/ModXact.cc  |  21 ++++-
+ src/adaptation/icap/ModXact.h   |  20 +++++
+ src/http/one/Parser.cc          |  35 ++++----
+ src/http/one/Parser.h           |  10 ++-
+ src/http/one/RequestParser.cc   |  16 ++--
+ src/http/one/RequestParser.h    |   8 +-
+ src/http/one/ResponseParser.cc  |  17 ++--
+ src/http/one/ResponseParser.h   |   2 +-
+ src/http/one/TeChunkedParser.cc | 139 ++++++++++++++++++--------------
+ src/http/one/TeChunkedParser.h  |  41 ++++++++--
+ src/http/one/Tokenizer.cc       | 104 ++++++++++++------------
+ src/http/one/Tokenizer.h        |  89 ++++++++------------
+ src/http/one/forward.h          |   3 +
+ src/parser/BinaryTokenizer.h    |   3 +-
+ src/parser/Makefile.am          |   1 +
+ src/parser/Tokenizer.cc         |  40 +++++++++
+ src/parser/Tokenizer.h          |  13 +++
+ src/parser/forward.h            |  22 +++++
+ 18 files changed, 364 insertions(+), 220 deletions(-)
+ create mode 100644 src/parser/forward.h
+
+--- a/src/adaptation/icap/ModXact.cc
++++ b/src/adaptation/icap/ModXact.cc
+@@ -25,12 +25,13 @@
+ #include "comm.h"
+ #include "comm/Connection.h"
+ #include "err_detail_type.h"
+-#include "http/one/TeChunkedParser.h"
+ #include "HttpHeaderTools.h"
+ #include "HttpMsg.h"
+ #include "HttpReply.h"
+ #include "HttpRequest.h"
+ #include "MasterXaction.h"
++#include "parser/Tokenizer.h"
++#include "sbuf/Stream.h"
+ #include "SquidTime.h"
+ 
+ // flow and terminology:
+@@ -44,6 +45,8 @@ CBDATA_NAMESPACED_CLASS_INIT(Adaptation:
+ 
+ static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
+ 
++const SBuf Adaptation::Icap::ChunkExtensionValueParser::UseOriginalBodyName("use-original-body");
++
+ Adaptation::Icap::ModXact::State::State()
+ {
+     memset(this, 0, sizeof(*this));
+@@ -1108,6 +1111,7 @@ void Adaptation::Icap::ModXact::decideOn
+         state.parsing = State::psBody;
+         replyHttpBodySize = 0;
+         bodyParser = new Http1::TeChunkedParser;
++        bodyParser->parseExtensionValuesWith(&extensionParser);
+         makeAdaptedBodyPipe("adapted response from the ICAP server");
+         Must(state.sending == State::sendingAdapted);
+     } else {
+@@ -1142,9 +1146,8 @@ void Adaptation::Icap::ModXact::parseBod
+     }
+ 
+     if (parsed) {
+-        if (state.readyForUob && bodyParser->useOriginBody >= 0) {
+-            prepPartialBodyEchoing(
+-                static_cast<uint64_t>(bodyParser->useOriginBody));
++        if (state.readyForUob && extensionParser.sawUseOriginalBody()) {
++            prepPartialBodyEchoing(extensionParser.useOriginalBody());
+             stopParsing();
+             return;
+         }
+@@ -2014,3 +2017,14 @@ void Adaptation::Icap::ModXactLauncher::
+     }
+ }
+ 
++void
++Adaptation::Icap::ChunkExtensionValueParser::parse(Tokenizer &tok, const SBuf &extName)
++{
++    if (extName == UseOriginalBodyName) {
++        useOriginalBody_ = tok.udec64("use-original-body");
++        assert(useOriginalBody_ >= 0);
++    } else {
++        Ignore(tok, extName);
++    }
++}
++
+--- a/src/adaptation/icap/ModXact.h
++++ b/src/adaptation/icap/ModXact.h
+@@ -15,6 +15,7 @@
+ #include "adaptation/icap/Xaction.h"
+ #include "BodyPipe.h"
+ #include "http/one/forward.h"
++#include "http/one/TeChunkedParser.h"
+ 
+ /*
+  * ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
+@@ -105,6 +106,23 @@ private:
+     enum State { stDisabled, stWriting, stIeof, stDone } theState;
+ };
+ 
++/// handles ICAP-specific chunk extensions supported by Squid
++class ChunkExtensionValueParser: public Http1::ChunkExtensionValueParser
++{
++public:
++    /* Http1::ChunkExtensionValueParser API */
++    virtual void parse(Tokenizer &tok, const SBuf &extName) override;
++
++    bool sawUseOriginalBody() const { return useOriginalBody_ >= 0; }
++    uint64_t useOriginalBody() const { assert(sawUseOriginalBody()); return static_cast<uint64_t>(useOriginalBody_); }
++
++private:
++    static const SBuf UseOriginalBodyName;
++
++    /// the value of the parsed use-original-body chunk extension (or -1)
++    int64_t useOriginalBody_ = -1;
++};
++
+ class ModXact: public Xaction, public BodyProducer, public BodyConsumer
+ {
+     CBDATA_CLASS(ModXact);
+@@ -270,6 +288,8 @@ private:
+ 
+     int adaptHistoryId; ///< adaptation history slot reservation
+ 
++    ChunkExtensionValueParser extensionParser;
++
+     class State
+     {
+ 
+--- a/src/http/one/Parser.cc
++++ b/src/http/one/Parser.cc
+@@ -7,10 +7,11 @@
+  */
+ 
+ #include "squid.h"
++#include "base/CharacterSet.h"
+ #include "Debug.h"
+ #include "http/one/Parser.h"
+-#include "http/one/Tokenizer.h"
+ #include "mime_header.h"
++#include "parser/Tokenizer.h"
+ #include "SquidConfig.h"
+ 
+ /// RFC 7230 section 2.6 - 7 magic octets
+@@ -61,20 +62,19 @@ Http::One::Parser::DelimiterCharacters()
+            RelaxedDelimiterCharacters() : CharacterSet::SP;
+ }
+ 
+-bool
+-Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
++void
++Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
+ {
+     if (tok.skip(Http1::CrLf()))
+-        return true;
++        return;
+ 
+     if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
+-        return true;
++        return;
+ 
+     if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
+-        return false; // need more data
++        throw InsufficientInput();
+ 
+     throw TexcHere("garbage instead of CRLF line terminator");
+-    return false; // unreachable, but make naive compilers happy
+ }
+ 
+ /// all characters except the LF line terminator
+@@ -102,7 +102,7 @@ LineCharacters()
+ void
+ Http::One::Parser::cleanMimePrefix()
+ {
+-    Http1::Tokenizer tok(mimeHeaderBlock_);
++    Tokenizer tok(mimeHeaderBlock_);
+     while (tok.skipOne(RelaxedDelimiterCharacters())) {
+         (void)tok.skipAll(LineCharacters()); // optional line content
+         // LF terminator is required.
+@@ -137,7 +137,7 @@ Http::One::Parser::cleanMimePrefix()
+ void
+ Http::One::Parser::unfoldMime()
+ {
+-    Http1::Tokenizer tok(mimeHeaderBlock_);
++    Tokenizer tok(mimeHeaderBlock_);
+     const auto szLimit = mimeHeaderBlock_.length();
+     mimeHeaderBlock_.clear();
+     // prevent the mime sender being able to make append() realloc/grow multiple times.
+@@ -228,7 +228,7 @@ Http::One::Parser::getHostHeaderField()
+     debugs(25, 5, "looking for " << name);
+ 
+     // while we can find more LF in the SBuf
+-    Http1::Tokenizer tok(mimeHeaderBlock_);
++    Tokenizer tok(mimeHeaderBlock_);
+     SBuf p;
+ 
+     while (tok.prefix(p, LineCharacters())) {
+@@ -250,7 +250,7 @@ Http::One::Parser::getHostHeaderField()
+         p.consume(namelen + 1);
+ 
+         // TODO: optimize SBuf::trim to take CharacterSet directly
+-        Http1::Tokenizer t(p);
++        Tokenizer t(p);
+         t.skipAll(CharacterSet::WSP);
+         p = t.remaining();
+ 
+@@ -278,10 +278,15 @@ Http::One::ErrorLevel()
+ }
+ 
+ // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
+-bool
+-Http::One::ParseBws(Tokenizer &tok)
++void
++Http::One::ParseBws(Parser::Tokenizer &tok)
+ {
+-    if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
++    const auto count = tok.skipAll(Parser::WhitespaceCharacters());
++
++    if (tok.atEnd())
++        throw InsufficientInput(); // even if count is positive
++
++    if (count) {
+         // Generating BWS is a MUST-level violation so warn about it as needed.
+         debugs(33, ErrorLevel(), "found " << count << " BWS octets");
+         // RFC 7230 says we MUST parse BWS, so we fall through even if
+@@ -289,6 +294,6 @@ Http::One::ParseBws(Tokenizer &tok)
+     }
+     // else we successfully "parsed" an empty BWS sequence
+ 
+-    return true;
++    // success: no more BWS characters expected
+ }
+ 
+--- a/src/http/one/Parser.h
++++ b/src/http/one/Parser.h
+@@ -12,6 +12,7 @@
+ #include "anyp/ProtocolVersion.h"
+ #include "http/one/forward.h"
+ #include "http/StatusCode.h"
++#include "parser/forward.h"
+ #include "sbuf/SBuf.h"
+ 
+ namespace Http {
+@@ -40,6 +41,7 @@ class Parser : public RefCountable
+ {
+ public:
+     typedef SBuf::size_type size_type;
++    typedef ::Parser::Tokenizer Tokenizer;
+ 
+     Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE), hackExpectsMime_(false) {}
+     virtual ~Parser() {}
+@@ -118,11 +120,11 @@ protected:
+      * detect and skip the CRLF or (if tolerant) LF line terminator
+      * consume from the tokenizer.
+      *
+-     * throws if non-terminator is detected.
++     * \throws exception on bad or InsuffientInput.
+      * \retval true only if line terminator found.
+      * \retval false incomplete or missing line terminator, need more data.
+      */
+-    bool skipLineTerminator(Http1::Tokenizer &tok) const;
++    void skipLineTerminator(Tokenizer &) const;
+ 
+     /**
+      * Scan to find the mime headers block for current message.
+@@ -159,8 +161,8 @@ private:
+ };
+ 
+ /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
+-/// \returns true (always; unlike all the skip*() functions)
+-bool ParseBws(Tokenizer &tok);
++/// \throws InsufficientInput when the end of BWS cannot be confirmed
++void ParseBws(Parser::Tokenizer &);
+ 
+ /// the right debugs() level for logging HTTP violation messages
+ int ErrorLevel();
+--- a/src/http/one/RequestParser.cc
++++ b/src/http/one/RequestParser.cc
+@@ -9,8 +9,8 @@
+ #include "squid.h"
+ #include "Debug.h"
+ #include "http/one/RequestParser.h"
+-#include "http/one/Tokenizer.h"
+ #include "http/ProtocolVersion.h"
++#include "parser/Tokenizer.h"
+ #include "profiler/Profiler.h"
+ #include "SquidConfig.h"
+ 
+@@ -64,7 +64,7 @@ Http::One::RequestParser::skipGarbageLin
+  *  RFC 7230 section 2.6, 3.1 and 3.5
+  */
+ bool
+-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
++Http::One::RequestParser::parseMethodField(Tokenizer &tok)
+ {
+     // method field is a sequence of TCHAR.
+     // Limit to 32 characters to prevent overly long sequences of non-HTTP
+@@ -145,7 +145,7 @@ Http::One::RequestParser::RequestTargetC
+ }
+ 
+ bool
+-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
++Http::One::RequestParser::parseUriField(Tokenizer &tok)
+ {
+     /* Arbitrary 64KB URI upper length limit.
+      *
+@@ -178,7 +178,7 @@ Http::One::RequestParser::parseUriField(
+ }
+ 
+ bool
+-Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
++Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
+ {
+     static const SBuf http1p0("HTTP/1.0");
+     static const SBuf http1p1("HTTP/1.1");
+@@ -253,7 +253,7 @@ Http::One::RequestParser::skipDelimiter(
+ 
+ /// Parse CRs at the end of request-line, just before the terminating LF.
+ bool
+-Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
++Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
+ {
+     if (Config.onoff.relaxed_header_parser) {
+         (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
+@@ -289,12 +289,12 @@ Http::One::RequestParser::parseRequestFi
+     // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
+     // Now, the request line has to end at the first LF.
+     static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
+-    ::Parser::Tokenizer lineTok(buf_);
++    Tokenizer lineTok(buf_);
+     if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
+         if (buf_.length() >= Config.maxRequestHeaderSize) {
+             /* who should we blame for our failure to parse this line? */
+ 
+-            Http1::Tokenizer methodTok(buf_);
++            Tokenizer methodTok(buf_);
+             if (!parseMethodField(methodTok))
+                 return -1; // blame a bad method (or its delimiter)
+ 
+@@ -308,7 +308,7 @@ Http::One::RequestParser::parseRequestFi
+         return 0;
+     }
+ 
+-    Http1::Tokenizer tok(line);
++    Tokenizer tok(line);
+ 
+     if (!parseMethodField(tok))
+         return -1;
+--- a/src/http/one/RequestParser.h
++++ b/src/http/one/RequestParser.h
+@@ -54,11 +54,11 @@ private:
+     bool doParse(const SBuf &aBuf);
+ 
+     /* all these return false and set parseStatusCode on parsing failures */
+-    bool parseMethodField(Http1::Tokenizer &);
+-    bool parseUriField(Http1::Tokenizer &);
+-    bool parseHttpVersionField(Http1::Tokenizer &);
++    bool parseMethodField(Tokenizer &);
++    bool parseUriField(Tokenizer &);
++    bool parseHttpVersionField(Tokenizer &);
+     bool skipDelimiter(const size_t count, const char *where);
+-    bool skipTrailingCrs(Http1::Tokenizer &tok);
++    bool skipTrailingCrs(Tokenizer &tok);
+ 
+     bool http0() const {return !msgProtocol_.major;}
+     static const CharacterSet &RequestTargetCharacters();
+--- a/src/http/one/ResponseParser.cc
++++ b/src/http/one/ResponseParser.cc
+@@ -9,8 +9,8 @@
+ #include "squid.h"
+ #include "Debug.h"
+ #include "http/one/ResponseParser.h"
+-#include "http/one/Tokenizer.h"
+ #include "http/ProtocolVersion.h"
++#include "parser/Tokenizer.h"
+ #include "profiler/Profiler.h"
+ #include "SquidConfig.h"
+ 
+@@ -47,7 +47,7 @@ Http::One::ResponseParser::firstLineSize
+ // NP: we found the protocol version and consumed it already.
+ // just need the status code and reason phrase
+ int
+-Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
++Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim)
+ {
+     if (!completedStatus_) {
+         debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
+@@ -87,14 +87,13 @@ Http::One::ResponseParser::parseResponse
+     static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
+     (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
+     try {
+-        if (skipLineTerminator(tok)) {
+-            debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+-            buf_ = tok.remaining(); // resume checkpoint
+-            return 1;
+-        }
++        skipLineTerminator(tok);
++        buf_ = tok.remaining(); // resume checkpoint
++        debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
++        return 1;
++    } catch (const InsufficientInput &) {
+         reasonPhrase_.clear();
+         return 0; // need more to be sure we have it all
+-
+     } catch (const std::exception &ex) {
+         debugs(74, 6, "invalid status-line: " << ex.what());
+     }
+@@ -119,7 +118,7 @@ Http::One::ResponseParser::parseResponse
+ int
+ Http::One::ResponseParser::parseResponseFirstLine()
+ {
+-    Http1::Tokenizer tok(buf_);
++    Tokenizer tok(buf_);
+ 
+     const CharacterSet &WspDelim = DelimiterCharacters();
+ 
+--- a/src/http/one/ResponseParser.h
++++ b/src/http/one/ResponseParser.h
+@@ -43,7 +43,7 @@ public:
+ 
+ private:
+     int parseResponseFirstLine();
+-    int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &);
++    int parseResponseStatusAndReason(Tokenizer&, const CharacterSet &);
+ 
+     /// magic prefix for identifying ICY response messages
+     static const SBuf IcyMagic;
+--- a/src/http/one/TeChunkedParser.cc
++++ b/src/http/one/TeChunkedParser.cc
+@@ -13,10 +13,13 @@
+ #include "http/one/Tokenizer.h"
+ #include "http/ProtocolVersion.h"
+ #include "MemBuf.h"
++#include "parser/Tokenizer.h"
+ #include "Parsing.h"
++#include "sbuf/Stream.h"
+ #include "SquidConfig.h"
+ 
+-Http::One::TeChunkedParser::TeChunkedParser()
++Http::One::TeChunkedParser::TeChunkedParser():
++    customExtensionValueParser(nullptr)
+ {
+     // chunked encoding only exists in HTTP/1.1
+     Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
+@@ -31,7 +34,11 @@ Http::One::TeChunkedParser::clear()
+     buf_.clear();
+     theChunkSize = theLeftBodySize = 0;
+     theOut = NULL;
+-    useOriginBody = -1;
++    // XXX: We do not reset customExtensionValueParser here. Based on the
++    // clear() API description, we must, but it makes little sense and could
++    // break method callers if they appear because some of them may forget to
++    // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
++    // parent class and this unnecessary method with it.
+ }
+ 
+ bool
+@@ -49,14 +56,14 @@ Http::One::TeChunkedParser::parse(const
+     if (parsingStage_ == Http1::HTTP_PARSE_NONE)
+         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+ 
+-    Http1::Tokenizer tok(buf_);
++    Tokenizer tok(buf_);
+ 
+     // loop for as many chunks as we can
+     // use do-while instead of while so that we can incrementally
+     // restart in the middle of a chunk/frame
+     do {
+ 
+-        if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
++        if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
+             return false;
+ 
+         if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
+@@ -80,7 +87,7 @@ Http::One::TeChunkedParser::needsMoreSpa
+ 
+ /// RFC 7230 section 4.1 chunk-size
+ bool
+-Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
++Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
+ {
+     Must(theChunkSize <= 0); // Should(), really
+ 
+@@ -104,66 +111,75 @@ Http::One::TeChunkedParser::parseChunkSi
+     return false; // should not be reachable
+ }
+ 
+-/**
+- * Parses chunk metadata suffix, looking for interesting extensions and/or
+- * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667:
+- *
+- *   chunk-ext = *( BWS  ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+- *   chunk-ext-name = token
+- *   chunk-ext-val  = token / quoted-string
+- *
+- * ICAP 'use-original-body=N' extension is supported.
+- */
+-bool
+-Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
+-{
+-    SBuf ext;
+-    SBuf value;
+-    while (
+-        ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+-        tok.skip(';') &&
+-        ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name
+-        tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name
+-
+-        // whole value part is optional. if no '=' expect next chunk-ext
+-        if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) {
+-
+-            if (!skipKnown) {
+-                if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
+-                    debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
+-                    buf_ = tok.remaining(); // parse checkpoint
+-                    continue;
+-                }
+-            }
+-
+-            debugs(94, 5, "skipping unknown chunk extension " << ext);
+-
+-            // unknown might have a value token or quoted-string
+-            if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
+-                buf_ = tok.remaining(); // parse checkpoint
+-                continue;
+-            }
+-
+-            // otherwise need more data OR corrupt syntax
+-            break;
+-        }
+-
+-        if (!tok.atEnd())
+-            buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
+-    }
+-
+-    if (skipLineTerminator(tok)) {
+-        buf_ = tok.remaining(); // checkpoint
+-        // non-0 chunk means data, 0-size means optional Trailer follows
++/// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
++///   chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
++///   last-chunk = 1*"0" [ chunk-ext ] CRLF
++bool
++Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
++{
++    // Code becomes much simpler when incremental parsing functions throw on
++    // bad or insufficient input, like in the code below. TODO: Expand up.
++    try {
++        parseChunkExtensions(tok); // a possibly empty chunk-ext list
++        skipLineTerminator(tok);
++        buf_ = tok.remaining();
+         parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
+         return true;
++    } catch (const InsufficientInput &) {
++        tok.reset(buf_); // backtrack to the last commit point
++        return false;
+     }
++    // other exceptions bubble up to kill message parsing
++}
++
++/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
++void
++Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
++{
++    do {
++        ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+ 
+-    return false;
++        if (!tok.skip(';'))
++            return; // reached the end of extensions (if any)
++
++        parseOneChunkExtension(tok);
++        buf_ = tok.remaining(); // got one extension
++    } while (true);
++}
++
++void
++Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName)
++{
++    const auto ignoredValue = tokenOrQuotedString(tok);
++    debugs(94, 5, extName << " with value " << ignoredValue);
++}
++
++/// Parses a single chunk-ext list element:
++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
++void
++Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
++{
++    ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
++
++    const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
++
++    ParseBws(tok);
++
++    if (!tok.skip('='))
++        return; // parsed a valueless chunk-ext
++
++    ParseBws(tok);
++
++    // optimization: the only currently supported extension needs last-chunk
++    if (!theChunkSize && customExtensionValueParser)
++        customExtensionValueParser->parse(tok, extName);
++    else
++        ChunkExtensionValueParser::Ignore(tok, extName);
+ }
+ 
+ bool
+-Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
++Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok)
+ {
+     if (theLeftBodySize > 0) {
+         buf_ = tok.remaining(); // sync buffers before buf_ use
+@@ -188,17 +204,20 @@ Http::One::TeChunkedParser::parseChunkBo
+ }
+ 
+ bool
+-Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
++Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
+ {
+     Must(theLeftBodySize == 0); // Should(), really
+ 
+-    if (skipLineTerminator(tok)) {
++    try {
++        skipLineTerminator(tok);
+         buf_ = tok.remaining(); // parse checkpoint
+         theChunkSize = 0; // done with the current chunk
+         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
+         return true;
+     }
+-
+-    return false;
++    catch (const InsufficientInput &) {
++        return false;
++    }
++    // other exceptions bubble up to kill message parsing
+ }
+ 
+--- a/src/http/one/TeChunkedParser.h
++++ b/src/http/one/TeChunkedParser.h
+@@ -18,6 +18,26 @@ namespace Http
+ namespace One
+ {
+ 
++using ::Parser::InsufficientInput;
++
++// TODO: Move this class into http/one/ChunkExtensionValueParser.*
++/// A customizable parser of a single chunk extension value (chunk-ext-val).
++/// From RFC 7230 section 4.1.1 and its Errata #4667:
++/// chunk-ext = *( BWS  ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
++/// chunk-ext-name = token
++/// chunk-ext-val  = token / quoted-string
++class ChunkExtensionValueParser
++{
++public:
++    typedef ::Parser::Tokenizer Tokenizer;
++
++    /// extracts and ignores the value of a named extension
++    static void Ignore(Tokenizer &tok, const SBuf &extName);
++
++    /// extracts and then interprets (or ignores) the extension value
++    virtual void parse(Tokenizer &tok, const SBuf &extName) = 0;
++};
++
+ /**
+  * An incremental parser for chunked transfer coding
+  * defined in RFC 7230 section 4.1.
+@@ -25,7 +45,7 @@ namespace One
+  *
+  * The parser shovels content bytes from the raw
+  * input buffer into the content output buffer, both caller-supplied.
+- * Ignores chunk extensions except for ICAP's ieof.
++ * Chunk extensions like use-original-body are handled via parseExtensionValuesWith().
+  * Trailers are available via mimeHeader() if wanted.
+  */
+ class TeChunkedParser : public Http1::Parser
+@@ -37,6 +57,10 @@ public:
+     /// set the buffer to be used to store decoded chunk data
+     void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
+ 
++    /// Instead of ignoring all chunk extension values, give the supplied
++    /// parser a chance to handle them. Only applied to last-chunk (for now).
++    void parseExtensionValuesWith(ChunkExtensionValueParser *parser) { customExtensionValueParser = parser; }
++
+     bool needsMoreSpace() const;
+ 
+     /* Http1::Parser API */
+@@ -45,17 +69,20 @@ public:
+     virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
+ 
+ private:
+-    bool parseChunkSize(Http1::Tokenizer &tok);
+-    bool parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown);
+-    bool parseChunkBody(Http1::Tokenizer &tok);
+-    bool parseChunkEnd(Http1::Tokenizer &tok);
++    bool parseChunkSize(Tokenizer &tok);
++    bool parseChunkMetadataSuffix(Tokenizer &);
++    void parseChunkExtensions(Tokenizer &);
++    void parseOneChunkExtension(Tokenizer &);
++    bool parseChunkBody(Tokenizer &tok);
++    bool parseChunkEnd(Tokenizer &tok);
+ 
+     MemBuf *theOut;
+     uint64_t theChunkSize;
+     uint64_t theLeftBodySize;
+ 
+-public:
+-    int64_t useOriginBody;
++    /// An optional plugin for parsing and interpreting custom chunk-ext-val.
++    /// This "visitor" object is owned by our creator.
++    ChunkExtensionValueParser *customExtensionValueParser;
+ };
+ 
+ } // namespace One
+--- a/src/http/one/Tokenizer.cc
++++ b/src/http/one/Tokenizer.cc
+@@ -8,35 +8,18 @@
+ 
+ #include "squid.h"
+ #include "Debug.h"
++#include "http/one/Parser.h"
+ #include "http/one/Tokenizer.h"
++#include "parser/Tokenizer.h"
++#include "sbuf/Stream.h"
+ 
+-bool
+-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
++/// Extracts quoted-string after the caller removes the initial '"'.
++/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
++/// \throws InsufficientInput when input can be a token _prefix_
++/// \returns extracted quoted string (without quotes and with chars unescaped)
++static SBuf
++parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
+ {
+-    checkpoint();
+-
+-    if (!skip('"'))
+-        return false;
+-
+-    return qdText(returnedToken, http1p0);
+-}
+-
+-bool
+-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
+-{
+-    checkpoint();
+-
+-    if (!skip('"'))
+-        return prefix(returnedToken, CharacterSet::TCHAR);
+-
+-    return qdText(returnedToken, http1p0);
+-}
+-
+-bool
+-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
+-{
+-    // the initial DQUOTE has been skipped by the caller
+-
+     /*
+      * RFC 1945 - defines qdtext:
+      *   inclusive of LWS (which includes CR and LF)
+@@ -61,12 +44,17 @@ Http::One::Tokenizer::qdText(SBuf &retur
+     // best we can do is a conditional reference since http1p0 value may change per-client
+     const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
+ 
+-    for (;;) {
+-        SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
+-        returnedToken.append(consume(prefixLen));
++    SBuf parsedToken;
++
++    while (!tok.atEnd()) {
++        SBuf qdText;
++        if (tok.prefix(qdText, tokenChars))
++            parsedToken.append(qdText);
++
++        if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
++            if (tok.atEnd())
++                break;
+ 
+-        // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+-        if (!http1p0 && skip('\\')) {
+             /* RFC 7230 section 3.2.6
+              *
+              * The backslash octet ("\") can be used as a single-octet quoting
+@@ -78,32 +66,42 @@ Http::One::Tokenizer::qdText(SBuf &retur
+              */
+             static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
+             SBuf escaped;
+-            if (!prefix(escaped, qPairChars, 1)) {
+-                returnedToken.clear();
+-                restoreLastCheckpoint();
+-                return false;
+-            }
+-            returnedToken.append(escaped);
++            if (!tok.prefix(escaped, qPairChars, 1))
++                throw TexcHere("invalid escaped character in quoted-pair");
++
++            parsedToken.append(escaped);
+             continue;
++        }
+ 
+-        } else if (skip('"')) {
+-            break; // done
++        if (tok.skip('"'))
++            return parsedToken; // may be empty
+ 
+-        } else if (atEnd()) {
+-            // need more data
+-            returnedToken.clear();
+-            restoreLastCheckpoint();
+-            return false;
+-        }
++        if (tok.atEnd())
++            break;
+ 
+-        // else, we have an error
+-        debugs(24, 8, "invalid bytes for set " << tokenChars.name);
+-        returnedToken.clear();
+-        restoreLastCheckpoint();
+-        return false;
++        throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
+     }
+ 
+-    // found the whole string
+-    return true;
++    throw Http::One::InsufficientInput();
++}
++
++SBuf
++Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
++{
++    if (tok.skip('"'))
++        return parseQuotedStringSuffix(tok, http1p0);
++
++    if (tok.atEnd())
++        throw InsufficientInput();
++
++    SBuf parsedToken;
++    if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
++        throw TexcHere("invalid input while expecting an HTTP token");
++
++    if (tok.atEnd())
++        throw InsufficientInput();
++
++    // got the complete token
++    return parsedToken;
+ }
+ 
+--- a/src/http/one/Tokenizer.h
++++ b/src/http/one/Tokenizer.h
+@@ -9,68 +9,47 @@
+ #ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H
+ #define SQUID_SRC_HTTP_ONE_TOKENIZER_H
+ 
+-#include "parser/Tokenizer.h"
++#include "parser/forward.h"
++#include "sbuf/forward.h"
+ 
+ namespace Http {
+ namespace One {
+ 
+ /**
+- * Lexical processor extended to tokenize HTTP/1.x syntax.
++ * Extracts either an HTTP/1 token or quoted-string while dealing with
++ * possibly incomplete input typical for incremental text parsers.
++ * Unescapes escaped characters in HTTP/1.1 quoted strings.
+  *
+- * \see ::Parser::Tokenizer for more detail
++ * \param http1p0 whether to prohibit \-escaped characters in quoted strings
++ * \throws InsufficientInput as appropriate, including on unterminated tokens
++ * \returns extracted token or quoted string (without quotes)
++ *
++ * Governed by:
++ *  - RFC 1945 section 2.1
++ *  "
++ *    A string of text is parsed as a single word if it is quoted using
++ *    double-quote marks.
++ *
++ *        quoted-string  = ( <"> *(qdtext) <"> )
++ *
++ *        qdtext         = <any CHAR except <"> and CTLs,
++ *                         but including LWS>
++ *
++ *    Single-character quoting using the backslash ("\") character is not
++ *    permitted in HTTP/1.0.
++ *  "
++ *
++ *  - RFC 7230 section 3.2.6
++ *  "
++ *    A string of text is parsed as a single value if it is quoted using
++ *    double-quote marks.
++ *
++ *    quoted-string  = DQUOTE *( qdtext / quoted-pair ) DQUOTE
++ *    qdtext         = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
++ *    obs-text       = %x80-FF
++ *  "
+  */
+-class Tokenizer : public ::Parser::Tokenizer
+-{
+-public:
+-    Tokenizer(SBuf &s) : ::Parser::Tokenizer(s), savedStats_(0) {}
+-
+-    /**
+-     * Attempt to parse a quoted-string lexical construct.
+-     *
+-     * Governed by:
+-     *  - RFC 1945 section 2.1
+-     *  "
+-     *    A string of text is parsed as a single word if it is quoted using
+-     *    double-quote marks.
+-     *
+-     *        quoted-string  = ( <"> *(qdtext) <"> )
+-     *
+-     *        qdtext         = <any CHAR except <"> and CTLs,
+-     *                         but including LWS>
+-     *
+-     *    Single-character quoting using the backslash ("\") character is not
+-     *    permitted in HTTP/1.0.
+-     *  "
+-     *
+-     *  - RFC 7230 section 3.2.6
+-     *  "
+-     *    A string of text is parsed as a single value if it is quoted using
+-     *    double-quote marks.
+-     *
+-     *    quoted-string  = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+-     *    qdtext         = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+-     *    obs-text       = %x80-FF
+-     *  "
+-     *
+-     * \param escaped HTTP/1.0 does not permit \-escaped characters
+-     */
+-    bool quotedString(SBuf &value, const bool http1p0 = false);
+-
+-    /**
+-     * Attempt to parse a (token / quoted-string ) lexical construct.
+-     */
+-    bool quotedStringOrToken(SBuf &value, const bool http1p0 = false);
+-
+-private:
+-    /// parse the internal component of a quote-string, and terminal DQUOTE
+-    bool qdText(SBuf &value, const bool http1p0);
+-
+-    void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); }
+-    void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); }
+-
+-    SBuf savedCheckpoint_;
+-    SBuf::size_type savedStats_;
+-};
++SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0 = false);
+ 
+ } // namespace One
+ } // namespace Http
+--- a/src/http/one/forward.h
++++ b/src/http/one/forward.h
+@@ -10,6 +10,7 @@
+ #define SQUID_SRC_HTTP_ONE_FORWARD_H
+ 
+ #include "base/RefCount.h"
++#include "parser/forward.h"
+ #include "sbuf/forward.h"
+ 
+ namespace Http {
+@@ -31,6 +32,8 @@ typedef RefCount<Http::One::ResponsePars
+ /// CRLF textual representation
+ const SBuf &CrLf();
+ 
++using ::Parser::InsufficientInput;
++
+ } // namespace One
+ } // namespace Http
+ 
+--- a/src/parser/BinaryTokenizer.h
++++ b/src/parser/BinaryTokenizer.h
+@@ -9,6 +9,7 @@
+ #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
+ #define SQUID_SRC_PARSER_BINARYTOKENIZER_H
+ 
++#include "parser/forward.h"
+ #include "sbuf/SBuf.h"
+ 
+ namespace Parser
+@@ -44,7 +45,7 @@ public:
+ class BinaryTokenizer
+ {
+ public:
+-    class InsufficientInput {}; // thrown when a method runs out of data
++    typedef ::Parser::InsufficientInput InsufficientInput;
+     typedef uint64_t size_type; // enough for the largest supported offset
+ 
+     BinaryTokenizer();
+--- a/src/parser/Makefile.am
++++ b/src/parser/Makefile.am
+@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la
+ libparser_la_SOURCES = \
+ 	BinaryTokenizer.h \
+ 	BinaryTokenizer.cc \
++	forward.h \
+ 	Tokenizer.h \
+ 	Tokenizer.cc
+ 
+--- a/src/parser/Tokenizer.cc
++++ b/src/parser/Tokenizer.cc
+@@ -10,7 +10,9 @@
+ 
+ #include "squid.h"
+ #include "Debug.h"
++#include "parser/forward.h"
+ #include "parser/Tokenizer.h"
++#include "sbuf/Stream.h"
+ 
+ #include <cerrno>
+ #if HAVE_CTYPE_H
+@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returned
+     return true;
+ }
+ 
++SBuf
++Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
++{
++    if (atEnd())
++        throw InsufficientInput();
++
++    SBuf result;
++
++    if (!prefix(result, tokenChars, limit))
++        throw TexcHere(ToSBuf("cannot parse ", description));
++
++    if (atEnd())
++        throw InsufficientInput();
++
++    return result;
++}
++
+ bool
+ Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
+ {
+@@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & resul
+     return success(s - range.rawContent());
+ }
+ 
++int64_t
++Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
++{
++    if (atEnd())
++        throw InsufficientInput();
++
++    int64_t result = 0;
++
++    // Since we only support unsigned decimals, a parsing failure with a
++    // non-empty input always implies invalid/malformed input (or a buggy
++    // limit=0 caller). TODO: Support signed and non-decimal integers by
++    // refactoring int64() to detect insufficient input.
++    if (!int64(result, 10, false, limit))
++        throw TexcHere(ToSBuf("cannot parse ", description));
++
++    if (atEnd())
++        throw InsufficientInput(); // more digits may be coming
++
++    return result;
++}
++
+--- a/src/parser/Tokenizer.h
++++ b/src/parser/Tokenizer.h
+@@ -143,6 +143,19 @@ public:
+      */
+     bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
+ 
++    /*
++     * The methods below mimic their counterparts documented above, but they
++     * throw on errors, including InsufficientInput. The field description
++     * parameter is used for error reporting and debugging.
++     */
++
++    /// prefix() wrapper but throws InsufficientInput if input contains
++    /// nothing but the prefix (i.e. if the prefix is not "terminated")
++    SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
++
++    /// int64() wrapper but limited to unsigned decimal integers (for now)
++    int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
++
+ protected:
+     SBuf consume(const SBuf::size_type n);
+     SBuf::size_type success(const SBuf::size_type n);
+--- /dev/null
++++ b/src/parser/forward.h
+@@ -0,0 +1,22 @@
++/*
++ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
++ *
++ * Squid software is distributed under GPLv2+ license and includes
++ * contributions from numerous individuals and organizations.
++ * Please see the COPYING and CONTRIBUTORS files for details.
++ */
++
++#ifndef SQUID_PARSER_FORWARD_H
++#define SQUID_PARSER_FORWARD_H
++
++namespace Parser {
++class Tokenizer;
++class BinaryTokenizer;
++
++// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
++/// thrown by modern "incremental" parsers when they need more data
++class InsufficientInput {};
++} // namespace Parser
++
++#endif /* SQUID_PARSER_FORWARD_H */
++