From 4721d50363e4f6f6b40f9965c18496123da5c5f4 Mon Sep 17 00:00:00 2001 From: Chunting Gu Date: Thu, 18 Apr 2019 16:20:45 +0800 Subject: [PATCH] Fix multipart form data parsing --- examples/file_upload_server.cc | 4 +- unittest/parser_unittest.cc | 90 +++++++++++++++++++++++++++++++++- webcc/common.h | 29 ++--------- webcc/request.cc | 7 ++- webcc/request.h | 10 ++-- webcc/request_builder.cc | 8 +-- webcc/request_builder.h | 17 ++----- webcc/request_parser.cc | 38 +++++++++----- webcc/request_parser.h | 2 +- 9 files changed, 142 insertions(+), 63 deletions(-) diff --git a/examples/file_upload_server.cc b/examples/file_upload_server.cc index 1517439..1e93b7d 100644 --- a/examples/file_upload_server.cc +++ b/examples/file_upload_server.cc @@ -15,8 +15,8 @@ public: std::cout << "files: " << request.http->form_parts().size() << std::endl; for (auto& part : request.http->form_parts()) { - std::cout << "name: " << part.name() << std::endl; - std::cout << "data: " << std::endl << part.data() << std::endl; + std::cout << "name: " << part->name() << std::endl; + std::cout << "data: " << std::endl << part->data() << std::endl; } response->content = "OK"; diff --git a/unittest/parser_unittest.cc b/unittest/parser_unittest.cc index 329fb20..072c759 100644 --- a/unittest/parser_unittest.cc +++ b/unittest/parser_unittest.cc @@ -8,7 +8,7 @@ #include // ----------------------------------------------------------------------------- - +#if 0 // HTTP GET request parser test fixture. class GetRequestParserTest : public testing::Test { protected: @@ -145,3 +145,91 @@ TEST_F(PostRequestParserTest, ParseByteWise) { CheckResult(); } +#endif + +// ----------------------------------------------------------------------------- + +class MultipartRequestParserTest : public testing::Test { +protected: + MultipartRequestParserTest() : parser_(&request_) { + } + + void SetUp() override { + data_ = + "--e81381de-436b-4314-8662-7362d5593b12\r\n" + "Content-Disposition: form-data; name=\"file\"; filename=\"remember.txt\"\r\n" + "\r\n" + "Remember\r\n" + "BY CHRISTINA ROSSETTI\r\n" + "\r\n" + "Remember me when I am gone away,\r\n" + " Gone far away into the silent land;\r\n" + " When you can no more hold me by the hand,\r\n" + "Nor I half turn to go yet turning stay.\r\n" + "Remember me when no more day by day\r\n" + " You tell me of our future that you plann'd:\r\n" + " Only remember me; you understand\r\n" + "It will be late to counsel then or pray.\r\n" + "Yet if you should forget me for a while\r\n" + " And afterwards remember, do not grieve:\r\n" + " For if the darkness and corruption leave\r\n" + " A vestige of the thoughts that once I had,\r\n" + "Better by far you should forget and smile\r\n" + " Than that you should remember and be sad.\r\n" + "\r\n" + "--e81381de-436b-4314-8662-7362d5593b12\r\n" + "Content-Disposition: form-data; name=\"json\"\r\n" + "Content-Type: application/json\r\n" + "\r\n" + "{}\r\n" + "--e81381de-436b-4314-8662-7362d5593b12--\r\n"; + + payload_ = + "POST / HTTP/1.1\r\n" + "User-Agent: Webcc/0.1.0\r\n" + "Accept-Encoding: gzip, deflate\r\n" + "Accept: */*\r\n" + "Connection: Keep-Alive\r\n" + "Host: localhost:8080\r\n" + "Content-Type: multipart/form-data; boundary=e81381de-436b-4314-8662-7362d5593b12\r\n" + "Content-Length: " + std::to_string(data_.size()) + "\r\n" + "\r\n"; + + payload_ += data_; + } + + void CheckResult() { + EXPECT_EQ("POST", request_.method()); + EXPECT_EQ("localhost:8080", request_.GetHeader("Host")); + EXPECT_EQ("*/*", request_.GetHeader("Accept")); + EXPECT_EQ("Keep-Alive", request_.GetHeader("Connection")); + + EXPECT_EQ(2, request_.form_parts().size()); + } + + std::string payload_; + std::string data_; + + webcc::Request request_; + webcc::RequestParser parser_; +}; + +TEST_F(MultipartRequestParserTest, ParseFullDataOnce) { + bool ok = parser_.Parse(payload_.data(), payload_.size()); + + EXPECT_TRUE(ok); + EXPECT_TRUE(parser_.finished()); + + CheckResult(); +} + +TEST_F(MultipartRequestParserTest, ParseByteWise) { + for (std::size_t i = 0; i < payload_.size(); ++i) { + bool ok = parser_.Parse(payload_.data() + i, 1); + EXPECT_TRUE(ok); + } + + EXPECT_TRUE(parser_.finished()); + + CheckResult(); +} diff --git a/webcc/common.h b/webcc/common.h index be7352f..03140da 100644 --- a/webcc/common.h +++ b/webcc/common.h @@ -169,31 +169,8 @@ public: FormPart(const std::string& name, std::string&& data, const std::string& media_type = ""); -#if WEBCC_DEFAULT_MOVE_COPY_ASSIGN - - FormPart(FormPart&&) = default; - FormPart& operator=(FormPart&&) = default; - -#else - - FormPart(FormPart&& rhs) - : name_(std::move(rhs.name_)), - file_name_(std::move(rhs.file_name_)), - media_type_(std::move(rhs.media_type_)), - data_(std::move(rhs.data_)) { - } - - FormPart& operator=(FormPart&& rhs) { - if (&rhs != this) { - name_ = std::move(rhs.name_); - file_name_ = std::move(rhs.file_name_); - media_type_ = std::move(rhs.media_type_); - data_ = std::move(rhs.data_); - } - return *this; - } - -#endif // WEBCC_DEFAULT_MOVE_COPY_ASSIGN + FormPart(const FormPart&) = delete; + FormPart& operator=(const FormPart&) = delete; // API: SERVER const std::string& name() const { @@ -266,6 +243,8 @@ private: std::string data_; }; +using FormPartPtr = std::shared_ptr; + } // namespace webcc #endif // WEBCC_COMMON_H_ diff --git a/webcc/request.cc b/webcc/request.cc index 3f0d305..01353fd 100644 --- a/webcc/request.cc +++ b/webcc/request.cc @@ -2,6 +2,7 @@ #include "webcc/logger.h" #include "webcc/utility.h" +#include namespace webcc { @@ -52,7 +53,7 @@ void Request::Prepare() { data_payload.push_back(buffer(boundary_)); data_payload.push_back(buffer(misc_strings::CRLF)); - part.Prepare(&data_payload); + part->Prepare(&data_payload); } // Boundary end @@ -73,6 +74,10 @@ void Request::Prepare() { // Append payload of content data. payload_.insert(payload_.end(), data_payload.begin(), data_payload.end()); + + std::string str; + CopyPayload(&str); + std::cout << str; } void Request::CreateStartLine() { diff --git a/webcc/request.h b/webcc/request.h index d1cbf9a..c57f599 100644 --- a/webcc/request.h +++ b/webcc/request.h @@ -55,16 +55,16 @@ public: return port().empty() ? default_port : port(); } - const std::vector& form_parts() const { + const std::vector& form_parts() const { return form_parts_; } - void set_form_parts(std::vector&& form_parts) { + void set_form_parts(std::vector&& form_parts) { form_parts_ = std::move(form_parts); } - void AddFormPart(FormPart&& form_part) { - form_parts_.push_back(std::move(form_part)); + void AddFormPart(FormPartPtr form_part) { + form_parts_.push_back(form_part); } // Prepare payload. @@ -79,7 +79,7 @@ private: Url url_; // Files to upload for a POST request. - std::vector form_parts_; + std::vector form_parts_; std::string boundary_; }; diff --git a/webcc/request_builder.cc b/webcc/request_builder.cc index f6453ce..51cd73f 100644 --- a/webcc/request_builder.cc +++ b/webcc/request_builder.cc @@ -7,7 +7,7 @@ namespace webcc { -RequestPtr RequestBuilder::Build() { +RequestPtr RequestBuilder::operator()() { assert(parameters_.size() % 2 == 0); assert(headers_.size() % 2 == 0); @@ -44,7 +44,8 @@ RequestBuilder& RequestBuilder::File(const std::string& name, const Path& path, const std::string& media_type) { assert(!name.empty()); - form_parts_.push_back(FormPart{ name, path, media_type }); + auto part = std::make_shared(name, path, media_type); + form_parts_.push_back(part); return *this; } @@ -52,7 +53,8 @@ RequestBuilder& RequestBuilder::Form(const std::string& name, std::string&& data, const std::string& media_type) { assert(!name.empty()); - form_parts_.push_back(FormPart{ name, std::move(data), media_type }); + auto part = std::make_shared(name, std::move(data), media_type); + form_parts_.push_back(part); return *this; } diff --git a/webcc/request_builder.h b/webcc/request_builder.h index faaa30e..d592726 100644 --- a/webcc/request_builder.h +++ b/webcc/request_builder.h @@ -10,21 +10,14 @@ namespace webcc { class RequestBuilder { public: - explicit RequestBuilder(const std::string& method = "") - : method_(method) { - } - + RequestBuilder() = default; ~RequestBuilder() = default; RequestBuilder(const RequestBuilder&) = delete; RequestBuilder& operator=(const RequestBuilder&) = delete; // Build the request. - RequestPtr Build(); - - RequestPtr operator()() { - return Build(); - } + RequestPtr operator()(); RequestBuilder& Get() { return Method(methods::kGet); } RequestBuilder& Head() { return Method(methods::kHead); } @@ -72,8 +65,8 @@ public: RequestBuilder& File(const std::string& name, const Path& path, const std::string& media_type = ""); - RequestBuilder& Form(FormPart&& part) { - form_parts_.push_back(std::move(part)); + RequestBuilder& Form(FormPartPtr part) { + form_parts_.push_back(part); return *this; } @@ -121,7 +114,7 @@ private: bool json_ = false; // Files to upload for a POST request. - std::vector form_parts_; + std::vector form_parts_; // Compress the request content. // NOTE: Most servers don't support compressed requests. diff --git a/webcc/request_parser.cc b/webcc/request_parser.cc index 2d2a239..06b24dc 100644 --- a/webcc/request_parser.cc +++ b/webcc/request_parser.cc @@ -47,12 +47,12 @@ bool RequestParser::ParseContent(const char* data, std::size_t length) { } bool RequestParser::ParseMultipartContent(const char* data, - std::size_t length) { + std::size_t length) { // Append the new data to the pending data. // NOTE: It's more difficult to avoid this than normal fixed-length content. pending_data_.append(data, length); - LOG_VERB("Parse multipart content (pending data size: %u).", + LOG_VERB("Parse multipart content (3pending data size: %u).", pending_data_.size()); if (!content_length_parsed_ || content_length_ == kInvalidLength) { @@ -82,6 +82,9 @@ bool RequestParser::ParseMultipartContent(const char* data, } if (step_ == Step::kBoundaryParsed) { + if (!part_) { + part_.reset(new FormPart{}); + } bool need_more_data = false; if (ParsePartHeaders(&need_more_data)) { // Go to next step. @@ -103,9 +106,7 @@ bool RequestParser::ParseMultipartContent(const char* data, std::size_t count = 0; bool ended = false; if (!GetNextBoundaryLine(&off, &count, &ended)) { - // All pending data belongs to this part. - part_.AppendData(pending_data_); - pending_data_.clear(); + // Wait until next boundary. break; } @@ -113,11 +114,22 @@ bool RequestParser::ParseMultipartContent(const char* data, // This part has ended. if (off > 2) { - // -2 for exluding the CRLF after the data. - part_.AppendData(pending_data_.data(), off - 2); + // -2 for excluding the CRLF after the data. + part_->AppendData(pending_data_.data(), off - 2); + + // Erase the data of this part and the next boundary. + // +2 for including the CRLF after the boundary. + pending_data_.erase(0, off + count + 2); + } else { + LOG_ERRO("Invalid part data."); + return false; } - request_->AddFormPart(std::move(part_)); + // Add this part to request. + request_->AddFormPart(part_); + + // Reset for next part. + part_.reset(); if (ended) { // Go to the end step. @@ -174,10 +186,10 @@ bool RequestParser::ParsePartHeaders(bool* need_more_data) { header.second.c_str()); return false; } - part_.set_name(content_disposition.name()); - part_.set_file_name(content_disposition.file_name()); + part_->set_name(content_disposition.name()); + part_->set_file_name(content_disposition.file_name()); LOG_INFO("Content-Disposition (name=%s; filename=%s)", - part_.name().c_str(), part_.file_name().c_str()); + part_->name().c_str(), part_->file_name().c_str()); } // TODO: Parse other headers. @@ -190,8 +202,8 @@ bool RequestParser::ParsePartHeaders(bool* need_more_data) { } bool RequestParser::GetNextBoundaryLine(std::size_t* b_off, - std::size_t* b_count, - bool* ended) { + std::size_t* b_count, + bool* ended) { std::size_t off = 0; while (true) { diff --git a/webcc/request_parser.h b/webcc/request_parser.h index 29ed745..6cc0c07 100644 --- a/webcc/request_parser.h +++ b/webcc/request_parser.h @@ -43,7 +43,7 @@ private: }; Step step_ = kStart; - FormPart part_; + FormPartPtr part_; }; } // namespace webcc