commit re2 for openSUSE:Factory
Hello community, here is the log from the commit of package re2 for openSUSE:Factory checked in at 2017-07-30 11:27:26 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/re2 (Old) and /work/SRC/openSUSE:Factory/.re2.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "re2" Sun Jul 30 11:27:26 2017 rev:9 rq:513010 version:MACRO Changes: -------- --- /work/SRC/openSUSE:Factory/re2/re2.changes 2017-06-13 16:09:00.739007915 +0200 +++ /work/SRC/openSUSE:Factory/.re2.new/re2.changes 2017-07-30 11:27:40.606882316 +0200 @@ -1,0 +2,6 @@ +Sat Jul 29 10:04:51 UTC 2017 - mpluskal@suse.com + +- Update to version 2017-07-01 + * No upstream changelog available + +------------------------------------------------------------------- Old: ---- 2017-06-01.tar.gz New: ---- 2017-07-01.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ re2.spec ++++++ --- /var/tmp/diff_new_pack.3rrwPB/_old 2017-07-30 11:27:41.274788071 +0200 +++ /var/tmp/diff_new_pack.3rrwPB/_new 2017-07-30 11:27:41.282786942 +0200 @@ -16,7 +16,7 @@ # -%global longver 2017-06-01 +%global longver 2017-07-01 %global shortver %(echo %{longver}|sed 's|-||g') %define libname libre2-0 Name: re2 @@ -90,12 +90,10 @@ %postun -n %{libname} -p /sbin/ldconfig %files -n %{libname} -%defattr(-,root,root) %doc LICENSE AUTHORS CONTRIBUTORS README %{_libdir}/lib%{name}.so.* %files devel -%defattr(-,root,root) %{_includedir}/%{name} %{_libdir}/lib%{name}.so %{_libdir}/pkgconfig/%{name}.pc ++++++ 2017-06-01.tar.gz -> 2017-07-01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/re2/dfa.cc new/re2-2017-07-01/re2/dfa.cc --- old/re2-2017-06-01/re2/dfa.cc 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/re2/dfa.cc 2017-06-27 14:55:46.000000000 +0200 @@ -27,10 +27,12 @@ #include <string.h> #include <algorithm> #include <atomic> +#include <deque> #include <map> #include <mutex> #include <new> #include <string> +#include <unordered_map> #include <unordered_set> #include <utility> #include <vector> @@ -96,9 +98,11 @@ bool anchored, bool want_earliest_match, bool run_forward, bool* failed, const char** ep, std::vector<int>* matches); - // Builds out all states for the entire DFA. FOR TESTING ONLY - // Returns number of states. - int BuildAllStates(); + // Builds out all states for the entire DFA. + // If cb is not empty, it receives one callback per state built. + // Returns the number of states built. + // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY. + int BuildAllStates(const Prog::DFAStateCallback& cb); // Computes min and max for matching strings. Won't return strings // bigger than maxlen. @@ -1883,7 +1887,7 @@ } // Build out all states in DFA. Returns number of states. -int DFA::BuildAllStates() { +int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) { if (!ok()) return 0; @@ -1892,34 +1896,67 @@ RWLocker l(&cache_mutex_); SearchParams params(StringPiece(), StringPiece(), &l); params.anchored = false; - if (!AnalyzeSearch(¶ms) || params.start <= SpecialStateMax) + if (!AnalyzeSearch(¶ms) || + params.start == NULL || + params.start == DeadState) return 0; // Add start state to work queue. - StateSet queued; - std::vector<State*> q; - queued.insert(params.start); + // Note that any State* that we handle here must point into the cache, + // so we can simply depend on pointer-as-a-number hashing and equality. + std::unordered_map<State*, int> m; + std::deque<State*> q; + m.emplace(params.start, static_cast<int>(m.size())); q.push_back(params.start); + // Compute the input bytes needed to cover all of the next pointers. + int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot + std::vector<int> input(nnext); + for (int c = 0; c < 256; c++) { + int b = prog_->bytemap()[c]; + while (c < 256-1 && prog_->bytemap()[c+1] == b) + c++; + input[b] = c; + } + input[prog_->bytemap_range()] = kByteEndText; + + // Scratch space for the output. + std::vector<int> output(nnext); + // Flood to expand every state. - for (size_t i = 0; i < q.size(); i++) { - State* s = q[i]; - for (int c = 0; c < 257; c++) { + bool oom = false; + while (!q.empty()) { + State* s = q.front(); + q.pop_front(); + for (int c : input) { State* ns = RunStateOnByteUnlocked(s, c); - if (ns > SpecialStateMax && queued.find(ns) == queued.end()) { - queued.insert(ns); + if (ns == NULL) { + oom = true; + break; + } + if (ns == DeadState) { + output[ByteMap(c)] = -1; + continue; + } + if (m.find(ns) == m.end()) { + m.emplace(ns, static_cast<int>(m.size())); q.push_back(ns); } + output[ByteMap(c)] = m[ns]; } + if (cb) + cb(oom ? NULL : output.data(), + s == FullMatchState || s->IsMatch()); + if (oom) + break; } - return static_cast<int>(q.size()); + return static_cast<int>(m.size()); } // Build out all states in DFA for kind. Returns number of states. -int Prog::BuildEntireDFA(MatchKind kind) { - //LOG(ERROR) << "BuildEntireDFA is only for testing."; - return GetDFA(kind)->BuildAllStates(); +int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) { + return GetDFA(kind)->BuildAllStates(cb); } void Prog::TEST_dfa_should_bail_when_slow(bool b) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/re2/nfa.cc new/re2-2017-07-01/re2/nfa.cc --- old/re2-2017-06-01/re2/nfa.cc 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/re2/nfa.cc 2017-06-27 14:55:46.000000000 +0200 @@ -102,8 +102,8 @@ // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. - // p is the position of the previous byte (the one before c) - // in the input string, used when processing Match instructions. + // p is the position of byte c in the input string for AddToThreadq; + // p-1 will be used when processing Match instructions. // flag is the bitwise OR of Bol, Eol, etc., specifying whether // ^, $ and \b match the current input position (after c). // Frees all the threads on runq. @@ -328,8 +328,8 @@ // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. -// p is the position of the previous byte (the one before c) -// in the input string, used when processing Match instructions. +// p is the position of byte c in the input string for AddToThreadq; +// p-1 will be used when processing Match instructions. // flag is the bitwise OR of Bol, Eol, etc., specifying whether // ^, $ and \b match the current input position (after c). // Frees all the threads on runq. @@ -360,7 +360,7 @@ break; case kInstByteRange: - AddToThreadq(nextq, ip->out(), c, flag, p+1, t); + AddToThreadq(nextq, ip->out(), c, flag, p, t); break; case kInstAltMatch: @@ -381,8 +381,13 @@ } break; - case kInstMatch: - if (endmatch_ && p != etext_) + case kInstMatch: { + // Avoid invoking undefined behavior when p happens + // to be null - and p-1 would be meaningless anyway. + if (p == NULL) + break; + + if (endmatch_ && p-1 != etext_) break; if (longest_) { @@ -390,16 +395,16 @@ // it is either farther to the left or at the same // point but longer than an existing match. if (!matched_ || t->capture[0] < match_[0] || - (t->capture[0] == match_[0] && p > match_[1])) { + (t->capture[0] == match_[0] && p-1 > match_[1])) { CopyCapture(match_, t->capture); - match_[1] = p; + match_[1] = p-1; matched_ = true; } } else { // Leftmost-biased mode: this match is by definition // better than what we've already found (see next line). CopyCapture(match_, t->capture); - match_[1] = p; + match_[1] = p-1; matched_ = true; // Cut off the threads that can only find matches @@ -412,6 +417,7 @@ return 0; } break; + } } Decref(t); } @@ -546,12 +552,8 @@ fprintf(stderr, "\n"); } - // Note that we pass p-1 to Step() because it needs the previous pointer - // value in order to handle Match instructions appropriately. It plumbs - // c and flag through to AddToThreadq() along with p-1+1, which is p. - // // This is a no-op the first time around the loop because runq is empty. - int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p-1); + int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p); DCHECK_EQ(runq->size(), 0); using std::swap; swap(nextq, runq); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/re2/prog.cc new/re2-2017-07-01/re2/prog.cc --- old/re2-2017-06-01/re2/prog.cc 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/re2/prog.cc 2017-06-27 14:55:46.000000000 +0200 @@ -540,7 +540,7 @@ // dominator of the instructions reachable from some "successor root" (i.e. it // has an unreachable predecessor) and is considered a "dominator root". Since // only Alt instructions can be "dominator roots" (other instructions would be -// "leaves"), only Alt instructions require their predecessors to be computed. +// "leaves"), only Alt instructions are required to be marked as predecessors. // // Dividing the Prog into "trees" comprises two passes: marking the "successor // roots" and the predecessors; and marking the "dominator roots". Sorting the diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/re2/prog.h new/re2-2017-07-01/re2/prog.h --- old/re2-2017-06-01/re2/prog.h 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/re2/prog.h 2017-06-27 14:55:46.000000000 +0200 @@ -10,6 +10,7 @@ // expression symbolically. #include <stdint.h> +#include <functional> #include <mutex> #include <string> #include <vector> @@ -256,11 +257,22 @@ Anchor anchor, MatchKind kind, StringPiece* match0, bool* failed, std::vector<int>* matches); - // Build the entire DFA for the given match kind. FOR TESTING ONLY. + // The callback issued after building each DFA state with BuildEntireDFA(). + // If next is null, then the memory budget has been exhausted and building + // will halt. Otherwise, the state has been built and next points to an array + // of bytemap_range()+1 slots holding the next states as per the bytemap and + // kByteEndText. The number of the state is implied by the callback sequence: + // the first callback is for state 0, the second callback is for state 1, ... + // match indicates whether the state is a matching state. + using DFAStateCallback = std::function<void(const int* next, bool match)>; + + // Build the entire DFA for the given match kind. // Usually the DFA is built out incrementally, as needed, which - // avoids lots of unnecessary work. This function is useful only - // for testing purposes. Returns number of states. - int BuildEntireDFA(MatchKind kind); + // avoids lots of unnecessary work. + // If cb is not empty, it receives one callback per state built. + // Returns the number of states built. + // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY. + int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb); // Controls whether the DFA should bail out early if the NFA would be faster. // FOR TESTING ONLY. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/re2/testing/dfa_test.cc new/re2-2017-07-01/re2/testing/dfa_test.cc --- old/re2-2017-06-01/re2/testing/dfa_test.cc 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/re2/testing/dfa_test.cc 2017-06-27 14:55:46.000000000 +0200 @@ -28,7 +28,7 @@ // Helper function: builds entire DFA for prog. static void DoBuild(Prog* prog) { - CHECK(prog->BuildEntireDFA(Prog::kFirstMatch)); + CHECK(prog->BuildEntireDFA(Prog::kFirstMatch, nullptr)); } TEST(Multithreaded, BuildEntireDFA) { @@ -63,7 +63,7 @@ threads[j].join(); // One more compile, to make sure everything is okay. - prog->BuildEntireDFA(Prog::kFirstMatch); + prog->BuildEntireDFA(Prog::kFirstMatch, nullptr); delete prog; } @@ -88,8 +88,8 @@ CHECK(prog); //progusage = m.HeapGrowth(); //dfamem = prog->dfa_mem(); - prog->BuildEntireDFA(Prog::kFirstMatch); - prog->BuildEntireDFA(Prog::kLongestMatch); + prog->BuildEntireDFA(Prog::kFirstMatch, nullptr); + prog->BuildEntireDFA(Prog::kLongestMatch, nullptr); usage = m.HeapGrowth(); delete prog; } @@ -279,8 +279,8 @@ } struct ReverseTest { - const char *regexp; - const char *text; + const char* regexp; + const char* text; bool match; }; @@ -299,7 +299,7 @@ const ReverseTest& t = reverse_tests[i]; Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); CHECK(re); - Prog *prog = re->CompileToReverseProg(0); + Prog* prog = re->CompileToReverseProg(0); CHECK(prog); bool failed = false; bool matched = prog->SearchDFA(t.text, StringPiece(), Prog::kUnanchored, @@ -309,6 +309,70 @@ nfail++; } delete prog; + re->Decref(); + } + EXPECT_EQ(nfail, 0); +} + +struct CallbackTest { + const char* regexp; + const char* dump; +}; + +// Test that DFA::BuildAllStates() builds the expected DFA states +// and issues the expected callbacks. These test cases reflect the +// very compact encoding of the callbacks, but that also makes them +// very difficult to understand, so let's work through "\\Aa\\z". +// There are three slots per DFA state because the bytemap has two +// equivalence classes and there is a third slot for kByteEndText: +// 0: all bytes that are not 'a' +// 1: the byte 'a' +// 2: kByteEndText +// -1 means that there is no transition from that DFA state to any +// other DFA state for that slot. The valid transitions are thus: +// state 0 --slot 1--> state 1 +// state 1 --slot 2--> state 2 +// The double brackets indicate that state 2 is a matching state. +// Putting it together, this means that the DFA must consume the +// byte 'a' and then hit end of text. Q.E.D. +CallbackTest callback_tests[] = { + { "\\Aa\\z", "[-1,1,-1] [-1,-1,2] [[-1,-1,-1]]" }, + { "\\Aab\\z", "[-1,1,-1,-1] [-1,-1,2,-1] [-1,-1,-1,3] [[-1,-1,-1,-1]]" }, + { "\\Aa*b\\z", "[-1,0,1,-1] [-1,-1,-1,2] [[-1,-1,-1,-1]]" }, + { "\\Aa+b\\z", "[-1,1,-1,-1] [-1,1,2,-1] [-1,-1,-1,3] [[-1,-1,-1,-1]]" }, + { "\\Aa?b\\z", "[-1,1,2,-1] [-1,-1,2,-1] [-1,-1,-1,3] [[-1,-1,-1,-1]]" }, + { "\\Aa\\C*\\z", "[-1,1,-1] [1,1,2] [[-1,-1,-1]]" }, + { "\\Aa\\C*", "[-1,1,-1] [2,2,3] [[2,2,2]] [[-1,-1,-1]]" }, + { "a\\C*", "[0,1,-1] [2,2,3] [[2,2,2]] [[-1,-1,-1]]" }, + { "\\C*", "[1,2] [[1,1]] [[-1,-1]]" }, + { "a", "[0,1,-1] [2,2,2] [[-1,-1,-1]]"} , +}; + +TEST(DFA, Callback) { + int nfail = 0; + for (int i = 0; i < arraysize(callback_tests); i++) { + const CallbackTest& t = callback_tests[i]; + Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL); + CHECK(re); + Prog* prog = re->CompileToProg(0); + CHECK(prog); + string dump; + prog->BuildEntireDFA(Prog::kLongestMatch, [&](const int* next, bool match) { + CHECK(next != NULL); + if (!dump.empty()) + StringAppendF(&dump, " "); + StringAppendF(&dump, match ? "[[" : "["); + for (int b = 0; b < prog->bytemap_range() + 1; b++) + StringAppendF(&dump, "%d,", next[b]); + dump.pop_back(); + StringAppendF(&dump, match ? "]]" : "]"); + }); + if (dump != t.dump) { + LOG(ERROR) << t.regexp << " bytemap:\n" << prog->DumpByteMap(); + LOG(ERROR) << t.regexp << " dump:\ngot " << dump << "\nwant " << t.dump; + nfail++; + } + delete prog; re->Decref(); } EXPECT_EQ(nfail, 0); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/re2/testing/tester.cc new/re2-2017-07-01/re2/testing/tester.cc --- old/re2-2017-06-01/re2/testing/tester.cc 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/re2/testing/tester.cc 2017-06-27 14:55:46.000000000 +0200 @@ -465,14 +465,6 @@ break; } result->have_submatch = true; - - // Work around RE interface bug: PCRE returns -1 as the - // offsets for an unmatched subexpression, and RE should - // turn that into StringPiece(NULL) but in fact it uses - // StringPiece(text.begin() - 1, 0). Oops. - for (int i = 0; i < nsubmatch; i++) - if (result->submatch[i].begin() == text.begin() - 1) - result->submatch[i] = StringPiece(); delete[] argptr; delete[] a; break; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/util/pcre.cc new/re2-2017-07-01/util/pcre.cc --- old/re2-2017-06-01/util/pcre.cc 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/util/pcre.cc 2017-06-27 14:55:46.000000000 +0200 @@ -635,7 +635,17 @@ for (int i = 0; i < n; i++) { const int start = vec[2*(i+1)]; const int limit = vec[2*(i+1)+1]; - if (!args[i]->Parse(text.data() + start, limit-start)) { + + // Avoid invoking undefined behavior when text.data() happens + // to be null and start happens to be -1, the latter being the + // case for an unmatched subexpression. Even if text.data() is + // not null, pointing one byte before was a longstanding bug. + const char* addr = NULL; + if (start != -1) { + addr = text.data() + start; + } + + if (!args[i]->Parse(addr, limit-start)) { // TODO: Should we indicate what the error was? return false; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/util/sparse_array.h new/re2-2017-07-01/util/sparse_array.h --- old/re2-2017-06-01/util/sparse_array.h 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/util/sparse_array.h 2017-06-27 14:55:46.000000000 +0200 @@ -310,6 +310,20 @@ // and at the beginning and end of all public non-const member functions. void DebugCheckInvariants() const; + static bool ShouldInitializeMemory() { +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + return true; +#else + return false; +#endif +#elif defined(RE2_ON_VALGRIND) + return true; +#else + return false; +#endif + } + int size_ = 0; int max_size_ = 0; std::unique_ptr<int[]> sparse_to_dense_; @@ -415,14 +429,12 @@ dense_.resize(max_size); -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) - for (int i = max_size_; i < max_size; i++) { - sparse_to_dense_[i] = 0xababababU; - dense_[i].index_ = 0xababababU; + if (ShouldInitializeMemory()) { + for (int i = max_size_; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i].index_ = 0xababababU; + } } -#endif -#endif } max_size_ = max_size; if (size_ > max_size_) @@ -485,14 +497,12 @@ dense_.resize(max_size); size_ = 0; -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) - for (int i = 0; i < max_size; i++) { - sparse_to_dense_[i] = 0xababababU; - dense_[i].index_ = 0xababababU; + if (ShouldInitializeMemory()) { + for (int i = 0; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i].index_ = 0xababababU; + } } -#endif -#endif DebugCheckInvariants(); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/re2-2017-06-01/util/sparse_set.h new/re2-2017-07-01/util/sparse_set.h --- old/re2-2017-06-01/util/sparse_set.h 2017-05-18 07:20:04.000000000 +0200 +++ new/re2-2017-07-01/util/sparse_set.h 2017-06-27 14:55:46.000000000 +0200 @@ -160,6 +160,20 @@ // and at the beginning and end of all public non-const member functions. void DebugCheckInvariants() const; + static bool ShouldInitializeMemory() { +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + return true; +#else + return false; +#endif +#elif defined(RE2_ON_VALGRIND) + return true; +#else + return false; +#endif + } + int size_ = 0; int max_size_ = 0; std::unique_ptr<int[]> sparse_to_dense_; @@ -183,14 +197,12 @@ dense_.resize(max_size); -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) - for (int i = max_size_; i < max_size; i++) { - sparse_to_dense_[i] = 0xababababU; - dense_[i] = 0xababababU; + if (ShouldInitializeMemory()) { + for (int i = max_size_; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i] = 0xababababU; + } } -#endif -#endif } max_size_ = max_size; if (size_ > max_size_) @@ -226,14 +238,12 @@ dense_.resize(max_size); size_ = 0; -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) - for (int i = 0; i < max_size; i++) { - sparse_to_dense_[i] = 0xababababU; - dense_[i] = 0xababababU; + if (ShouldInitializeMemory()) { + for (int i = 0; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i] = 0xababababU; + } } -#endif -#endif DebugCheckInvariants(); }
participants (1)
-
root@hilbert.suse.de