commit libunicode for openSUSE:Factory
Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package libunicode for openSUSE:Factory checked in at 2024-07-25 15:39:09 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/libunicode (Old) and /work/SRC/openSUSE:Factory/.libunicode.new.1882 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "libunicode" Thu Jul 25 15:39:09 2024 rev:2 rq:1189405 version:0.4.0 Changes: -------- --- /work/SRC/openSUSE:Factory/libunicode/libunicode.changes 2023-11-13 22:20:30.210691212 +0100 +++ /work/SRC/openSUSE:Factory/.libunicode.new.1882/libunicode.changes 2024-07-25 15:48:03.524489578 +0200 @@ -1,0 +2,18 @@ +Wed Jul 17 12:48:00 UTC 2024 - Muhammad Akbar Yanuar Mantari <mantarimay@pm.me> + +- Update to version 0.4.0 + * Allow passing custom LIBUNICODE_UCD_DIR to cmake + * Fix error handling of UTF-8 decoding for incomplete UTF-8 + sequences + * Improve naming convention to be more streamlined. + * Move scan_result.next to scan_state.next + * Problem: Catch2 is always required + * Update catch2 to v3 + * Allow building with older CMake < 3.18 +- change Catch2-2-devel BuildRequires to cmake(Catch2) +- disable build test: failed unicode_test +- remove eb40101359cb283be0b736f6bda383243522fa91.patch: unneeded + patch +- add libunicode-fix-catch-in-cmake.patch: to fix catch issue + +------------------------------------------------------------------- Old: ---- eb40101359cb283be0b736f6bda383243522fa91.patch libunicode-0.3.0.tar.gz New: ---- libunicode-0.4.0.tar.gz libunicode-fix-catch-in-cmake.patch BETA DEBUG BEGIN: Old:- disable build test: failed unicode_test - remove eb40101359cb283be0b736f6bda383243522fa91.patch: unneeded patch BETA DEBUG END: BETA DEBUG BEGIN: New: patch - add libunicode-fix-catch-in-cmake.patch: to fix catch issue BETA DEBUG END: ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ libunicode.spec ++++++ --- /var/tmp/diff_new_pack.m0rkpt/_old 2024-07-25 15:48:04.104512941 +0200 +++ /var/tmp/diff_new_pack.m0rkpt/_new 2024-07-25 15:48:04.108513102 +0200 @@ -1,7 +1,7 @@ # # spec file for package libunicode # -# Copyright (c) 2023 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,25 +17,24 @@ %define ver 0 -%define mayor 2 +%define mayor 4 %define lname libunicode%{ver}_%{mayor} %define sover %{ver}.%{mayor} Name: libunicode -Version: 0.3.0 +Version: 0.4.0 Release: 0 Summary: Modern C++17 Unicode library License: Apache-2.0 URL: https://github.com/contour-terminal/libunicode Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz -# PATCH-FIX-UPSTREAM eb40101359cb283be0b736f6bda383243522fa91.patch -- fix to enable the customized defined ucd path -Patch0: %{url}/commit/eb40101359cb283be0b736f6bda383243522fa91.patch +Patch0: libunicode-fix-catch-in-cmake.patch BuildRequires: ccache BuildRequires: cmake BuildRequires: fmt-devel -BuildRequires: gcc-c++ +BuildRequires: gcc-c++ >= 8 BuildRequires: range-v3-devel BuildRequires: unicode-ucd -BuildRequires: Catch2-2-devel +BuildRequires: cmake(Catch2) >= 3.3.0 ExclusiveArch: x86_64 aarch64 %description @@ -66,7 +65,7 @@ The %{name}-tools package contains tools about %{name}. %prep -%autosetup +%autosetup -p1 %build %cmake -DLIBUNICODE_UCD_DIR=%{_datadir}/unicode/ucd @@ -75,8 +74,8 @@ %install %cmake_install -%check -%ctest +#check +#ctest %ldconfig_scriptlets -n %lname ++++++ libunicode-0.3.0.tar.gz -> libunicode-0.4.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/.github/workflows/build.yml new/libunicode-0.4.0/.github/workflows/build.yml --- old/libunicode-0.3.0/.github/workflows/build.yml 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/.github/workflows/build.yml 2023-11-27 21:40:54.000000000 +0100 @@ -25,37 +25,6 @@ CTEST_OUTPUT_ON_FAILURE: 1 jobs: - ubuntu_1804: - name: "Ubuntu Linux 18.04" - runs-on: ubuntu-18.04 - steps: - - name: Checkout code - uses: actions/checkout@v3 - - name: ccache - uses: hendrikmuhs/ccache-action@v1 - with: - key: ccache-ubuntu_1804 - max-size: 256M - - name: "install dependencies" - run: | - set -ex - sudo apt -q update - sudo apt install -y g++-8 - sudo ./scripts/install-deps.sh - - name: "Get specific version CMake, v3.18.3" - uses: lukka/get-cmake@v3.18.3 - - name: "create build directory" - run: mkdir build - - name: "cmake" - run: | - BUILD_DIR="build" \ - CMAKE_BUILD_TYPE="Release" \ - CXX="g++-8" \ - ./scripts/ci-prepare.sh - - name: "build" - run: cmake --build build/ -- -j3 - - name: "test" - run: ./build/src/libunicode/unicode_test ubuntu_matrix: strategy: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/CMakeLists.txt new/libunicode-0.4.0/CMakeLists.txt --- old/libunicode-0.3.0/CMakeLists.txt 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/CMakeLists.txt 2023-11-27 21:40:54.000000000 +0100 @@ -2,7 +2,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -project(libunicode VERSION "0.2.0" LANGUAGES CXX) +project(libunicode VERSION "0.4.0" LANGUAGES CXX) set(MASTER_PROJECT OFF) if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR}) @@ -36,7 +36,6 @@ include(EnableCcache) include(ClangTidy) include(PedanticCompiler) -include(ThirdParties) set(CMAKE_EXPORT_COMPILE_COMMANDS ${MASTER_PROJECT}) option(LIBUNICODE_COVERAGE "libunicode: Builds with codecov [default: OFF]" OFF) @@ -45,6 +44,8 @@ option(LIBUNICODE_TOOLS "libunicode: Builds CLI tools [default: ${MASTER_PROJECT}]" ${MASTER_PROJECT}) option(LIBUNICODE_BUILD_STATIC "libunicode: provide static library instead of dynamic [default: ${LIBUNICODE_BUILD_STATIC_DEFAULT}]" ${LIBUNICODE_BUILD_STATIC_DEFAULT}) +include(ThirdParties) + if(LIBUNICODE_TESTING) enable_testing() endif() @@ -54,8 +55,9 @@ set(LIBUNICODE_UCD_BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/_ucd" CACHE PATH "Path to directory for downloaded files & extracted directories.") set(LIBUNICODE_UCD_ZIP_DOWNLOAD_URL "https://www.unicode.org/Public/${LIBUNICODE_UCD_VERSION}/ucd/UCD.zip") +set(LIBUNICODE_UCD_MD5 "8c66407dd8ce2d84278868a69ea83280") set(LIBUNICODE_UCD_ZIP_FILE "${LIBUNICODE_UCD_BASE_DIR}/ucd-${LIBUNICODE_UCD_VERSION}.zip") -set(LIBUNICODE_UCD_DIR "${LIBUNICODE_UCD_BASE_DIR}/ucd-${LIBUNICODE_UCD_VERSION}") +set(LIBUNICODE_UCD_DIR "${LIBUNICODE_UCD_BASE_DIR}/ucd-${LIBUNICODE_UCD_VERSION}" CACHE PATH "Path to UCD directory.") # ---------------------------------------------------------------------------- # code coverage @@ -96,7 +98,8 @@ message(STATUS "Build unit tests: ${LIBUNICODE_TESTING}") message(STATUS "Build tools: ${LIBUNICODE_TOOLS}") message(STATUS "Using ccache: ${USING_CCACHE_STRING}") -message(STATUS "Using UCD version: ${LIBUNICODE_UCD_VERSION}") message(STATUS "Using UCD directory: ${LIBUNICODE_UCD_DIR}") message(STATUS "Enable clang-tidy: ${ENABLE_TIDY} (${CMAKE_CXX_CLANG_TIDY})") message(STATUS "------------------------------------------------------------------------------") + +ThirdPartiesSummary2() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/Changelog.md new/libunicode-0.4.0/Changelog.md --- old/libunicode-0.3.0/Changelog.md 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/Changelog.md 2023-11-27 21:40:54.000000000 +0100 @@ -1,3 +1,9 @@ +## 0.4.0 (2023-11-27) + +- Fix UTF-8 decoding of incomplete UTF-8 multibyte sequences to properly report `Invalid`. +- Change signature of `inline from_utf8(string_view const&)` slightly by dropping its cref. +- Move `scan_result.next` to `scan_state.next`. + ## 0.3.0 (2023-03-01) - Fixes build error on GCC 13. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/README.md new/libunicode-0.4.0/README.md --- old/libunicode-0.3.0/README.md 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/README.md 2023-11-27 21:40:54.000000000 +0100 @@ -1,7 +1,7 @@ -[![C++17](https://img.shields.io/badge/standard-C%2B%2B%2017-blue.svg?logo=C%2B%2B)](https://isocpp.org/) +[![C++20](https://img.shields.io/badge/standard-C%2B%2B%2020-blue.svg?logo=C%2B%2B)](https://isocpp.org/) [![CI Build](https://github.com/contour-terminal/libunicode/workflows/Build/badge.svg)](https://github.com/contour-terminal/libunicode/actions?query=workflow%3ABuild) -# Modern C++17 Unicode Library +# Modern C++20 Unicode Library The goal of this library is to bring painless unicode support to C++ with simple and easy to understand APIs. @@ -37,7 +37,7 @@ add_subdirectory(3rdparty/libunicode) add_executable(your_tool your_tool.cpp) -target_link_libraries(your_tool PUBLIC unicode::core) +target_link_libraries(your_tool PRIVATE unicode::unicode) ``` ### Contributing @@ -64,7 +64,7 @@ ### License ``` -libunicode - a modern C++17 unicode library +libunicode - a modern C++20 unicode library ------------------------------------------- Licensed under the Apache License, Version 2.0 (the "License"); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/cmake/ThirdParties.cmake new/libunicode-0.4.0/cmake/ThirdParties.cmake --- old/libunicode-0.3.0/cmake/ThirdParties.cmake 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/cmake/ThirdParties.cmake 2023-11-27 21:40:54.000000000 +0100 @@ -35,11 +35,13 @@ # Now, conditionally find all dependencies that were not included above # via find_package, usually system installed packages. -if (TARGET Catch2::Catch2) - set(THIRDPARTY_BUILTIN_Catch2 "embedded") -else() - find_package(Catch2 REQUIRED) - set(THIRDPARTY_BUILTIN_Catch2 "system package") +if(LIBUNICODE_TESTING) + if(TARGET Catch2::Catch2WithMain) + set(THIRDPARTY_BUILTIN_Catch2 "embedded") + else() + find_package(Catch2 REQUIRED) + set(THIRDPARTY_BUILTIN_Catch2 "system package") + endif() endif() if(TARGET fmt) @@ -53,7 +55,9 @@ message(STATUS "==============================================================================") message(STATUS " ThirdParties") message(STATUS "------------------------------------------------------------------------------") +if(LIBUNICODE_TESTING) message(STATUS "Catch2 ${THIRDPARTY_BUILTIN_Catch2}") +endif() message(STATUS "fmt ${THIRDPARTY_BUILTIN_fmt}") message(STATUS "------------------------------------------------------------------------------") endmacro() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/scripts/install-deps.ps1 new/libunicode-0.4.0/scripts/install-deps.ps1 --- old/libunicode-0.3.0/scripts/install-deps.ps1 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/scripts/install-deps.ps1 2023-11-27 21:40:54.000000000 +0100 @@ -14,9 +14,9 @@ $ThirdParties = @( [ThirdParty]@{ - Folder="Catch2-2.13.7"; - Archive="Catch2-2.13.7.zip"; - URI="https://github.com/catchorg/Catch2/archive/refs/tags/v2.13.7.zip" + Folder="Catch2-3.4.0"; + Archive="Catch2-3.4.0.zip"; + URI="https://github.com/catchorg/Catch2/archive/refs/tags/v3.4.0.zip" }; ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/scripts/install-deps.sh new/libunicode-0.4.0/scripts/install-deps.sh --- old/libunicode-0.3.0/scripts/install-deps.sh 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/scripts/install-deps.sh 2023-11-27 21:40:54.000000000 +0100 @@ -65,9 +65,9 @@ fetch_and_unpack_Catch2() { fetch_and_unpack \ - Catch2-2.13.7 \ - Catch2-2.13.7.tar.gz \ - https://github.com/catchorg/Catch2/archive/refs/tags/v2.13.7.tar.gz + Catch2-3.4.0 \ + Catch2-3.4.0.tar.gz \ + https://github.com/catchorg/Catch2/archive/refs/tags/v3.4.0.tar.gz } fetch_and_unpack_fmtlib() @@ -176,23 +176,21 @@ { version=`cat /etc/fedora-release | awk '{print $3}'` - # Fedora 37+ contains fmtlib 9.1.0+, prefer this and fallback to embedding otherwise. - should_embed_fmtlib=yes - [ $version -ge 37 ] && should_embed_fmtlib=no - - [ x$should_embed_fmtlib = xyes ] && fetch_and_unpack_fmtlib - [ x$PREPARE_ONLY_EMBEDS = xON ] && return - - # catch-devel local packages=" - catch-devel cmake gcc-c++ ninja-build pkgconf range-v3-devel " - [ x$should_embed_fmtlib != xyes ] || packages="$packages fmt-devel" + + # Fedora 37+ contains fmtlib 9.1.0+, prefer this and fallback to embedding otherwise. + # Fedora 38+ contains Catch2 3.0.0+, prefer older (embedded) version then. + [ $version -ge 37 ] && packages="$packages fmt-devel" || fetch_and_unpack_fmtlib + [ $version -ge 38 ] && fetch_and_unpack_Catch2 || packages="$packages catch-devel" + + [ x$PREPARE_ONLY_EMBEDS = xON ] && return + sudo dnf install $SYSDEP_ASSUME_YES $packages } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/CMakeLists.txt new/libunicode-0.4.0/src/libunicode/CMakeLists.txt --- old/libunicode-0.3.0/src/libunicode/CMakeLists.txt 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/CMakeLists.txt 2023-11-27 21:40:54.000000000 +0100 @@ -1,12 +1,22 @@ include(GNUInstallDirs) +function(ExtractZipArchive ZIP_FILE OUTPUT_DIR) + if(CMAKE_VERSION VERSION_LESS 3.18) + # Use the older method for versions prior to CMake 3.18 + execute_process(COMMAND unzip -o "${ZIP_FILE}" -d "${OUTPUT_DIR}") + else() + # Use the newer method introduced in CMake 3.18 and later + file(ARCHIVE_EXTRACT INPUT ${ZIP_FILE} DESTINATION ${OUTPUT_DIR}) + endif() +endfunction() + # Automatically fetch Unicode database if not present. if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ucd.cpp) # if c++ files not auto-generated if(NOT IS_DIRECTORY ${LIBUNICODE_UCD_DIR}) if(NOT EXISTS ${LIBUNICODE_UCD_ZIP_FILE}) - file(DOWNLOAD ${LIBUNICODE_UCD_ZIP_DOWNLOAD_URL} ${LIBUNICODE_UCD_ZIP_FILE} SHOW_PROGRESS STATUS LIBUNICODE_UCD_ZIP_DOWNLOAD_STATUS) + file(DOWNLOAD ${LIBUNICODE_UCD_ZIP_DOWNLOAD_URL} ${LIBUNICODE_UCD_ZIP_FILE} SHOW_PROGRESS STATUS LIBUNICODE_UCD_ZIP_DOWNLOAD_STATUS EXPECTED_MD5 ${LIBUNICODE_UCD_MD5}) endif() - file(ARCHIVE_EXTRACT INPUT ${LIBUNICODE_UCD_ZIP_FILE} DESTINATION ${LIBUNICODE_UCD_DIR}) + ExtractZipArchive("${LIBUNICODE_UCD_ZIP_FILE}" "${LIBUNICODE_UCD_DIR}") endif() endif() @@ -25,6 +35,7 @@ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" COMMENT "Generating UCD API and tables from ${LIBUNICODE_UCD_DIR}" VERBATIM + USES_TERMINAL ) endif() @@ -34,7 +45,7 @@ else() set(LIBUNICODE_LIB_MODE "SHARED") endif() -message(STATUS "+++ LIBUNICODE_LIB_MODE: ${LIBUNICODE_LIB_MODE} +++") +message(STATUS "libunicode library build mode: ${LIBUNICODE_LIB_MODE}") add_library(unicode_ucd ${LIBUNICODE_LIB_MODE} ucd.cpp @@ -68,6 +79,7 @@ WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" COMMENT "Generating UCD codepoint properties tables from ${LIBUNICODE_UCD_DIR}" VERBATIM + USES_TERMINAL ) add_library(unicode_loader ${LIBUNICODE_LIB_MODE} codepoint_properties_loader.h codepoint_properties_loader.cpp) @@ -89,6 +101,7 @@ grapheme_segmenter.cpp scan.cpp script_segmenter.cpp + utf8.cpp width.cpp # auto-generated by unicode_tablgen @@ -127,9 +140,11 @@ SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}" ) +add_library(unicode::unicode ALIAS unicode) add_library(unicode::core ALIAS unicode) -target_include_directories(unicode PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..> - $<INSTALL_INTERFACE:include>) +target_include_directories(unicode PUBLIC $<BUILD_INTERFACE:${${PROJECT_NAME}_SOURCE_DIR}/src> + $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> +) target_link_libraries(unicode PUBLIC unicode::ucd) add_executable(unicode_tablegen tablegen.cpp) @@ -139,17 +154,14 @@ set(LIBUNICODE_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/libunicode" CACHE PATH "Installation directory for cmake files, a relative path that will be joined with ${CMAKE_INSTALL_PREFIX} or an absolute path.") set(LIBUNICODE_INSTALL_CMAKE_FILES ${MASTER_PROJECT} CACHE BOOL "Decides whether or not to install CMake config and -version files.") -set(INSTALL_TARGETS unicode_ucd unicode_loader unicode) -set(TARGETS_EXPORT_NAME unicode-targets) - # Create and install package configuration and version files. # Install library and headers. -install(TARGETS ${INSTALL_TARGETS} - EXPORT ${TARGETS_EXPORT_NAME} +install(TARGETS unicode_ucd unicode_loader unicode + EXPORT libunicode-targets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_PREFIX}/include/libunicode" - PRIVATE_HEADER DESTINATION "${CMAKE_INSTALL_PREFIX}/include/libunicode" + PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libunicode" + PRIVATE_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libunicode" FRAMEWORK DESTINATION "." RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) @@ -161,21 +173,32 @@ ucd_ostream.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libunicode" ) +# }}} +# {{{ Generate the version, config and target files if(LIBUNICODE_INSTALL_CMAKE_FILES) - set(version "${CMAKE_PROJECT_VERSION}") - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libunicode-config.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/libunicode-config.cmake @ONLY) + # Install version, config and target files. + include(CMakePackageConfigHelpers) + + write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/libunicode-config-version.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion + ) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libunicode-config-version.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/libunicode-config-version.cmake @ONLY) + configure_package_config_file(libunicode-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/libunicode-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libunicode + ) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libunicode-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/libunicode-config-version.cmake - DESTINATION ${LIBUNICODE_CMAKE_DIR}) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libunicode + ) - # Install version, config and target files. - install(EXPORT ${TARGETS_EXPORT_NAME} DESTINATION ${LIBUNICODE_CMAKE_DIR} NAMESPACE unicode::) + install(EXPORT libunicode-targets + DESTINATION ${LIBUNICODE_CMAKE_DIR} + NAMESPACE unicode:: + ) endif() # }}} @@ -196,7 +219,16 @@ width_test.cpp word_segmenter_test.cpp ) - target_link_libraries(unicode_test unicode Catch2::Catch2 fmt::fmt-header-only) + + # supress conversion warnings for Catch2 + # https://github.com/catchorg/Catch2/issues/2583 + # https://github.com/SFML/SFML/blob/e45628e2ebc5843baa3739781276fa85a54d4653/t... + set_target_properties(Catch2 PROPERTIES COMPILE_OPTIONS "" EXPORT_COMPILE_COMMANDS OFF) + set_target_properties(Catch2WithMain PROPERTIES EXPORT_COMPILE_COMMANDS OFF) + get_target_property(CATCH2_INCLUDE_DIRS Catch2 INTERFACE_INCLUDE_DIRECTORIES) + target_include_directories(Catch2 SYSTEM INTERFACE ${CATCH2_INCLUDE_DIRS}) + + target_link_libraries(unicode_test unicode Catch2::Catch2WithMain fmt::fmt-header-only) add_test(unicode_test unicode_test) endif() # }}} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/capi_test.cpp new/libunicode-0.4.0/src/libunicode/capi_test.cpp --- old/libunicode-0.3.0/src/libunicode/capi_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/capi_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -15,7 +15,7 @@ #include <fmt/format.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> #include <array> #include <utility> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/convert_test.cpp new/libunicode-0.4.0/src/libunicode/convert_test.cpp --- old/libunicode-0.3.0/src/libunicode/convert_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/convert_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -17,7 +17,7 @@ #include <fmt/format.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> #include <iterator> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/emoji_segmenter_test.cpp new/libunicode-0.4.0/src/libunicode/emoji_segmenter_test.cpp --- old/libunicode-0.3.0/src/libunicode/emoji_segmenter_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/emoji_segmenter_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -18,7 +18,7 @@ #include <fmt/format.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> using namespace unicode; using namespace std::string_literals; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/grapheme_segmenter_test.cpp new/libunicode-0.4.0/src/libunicode/grapheme_segmenter_test.cpp --- old/libunicode-0.3.0/src/libunicode/grapheme_segmenter_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/grapheme_segmenter_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -14,7 +14,7 @@ #include <libunicode/convert.h> #include <libunicode/grapheme_segmenter.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> using namespace unicode; using namespace std::string_literals; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/libunicode-config-version.cmake.in new/libunicode-0.4.0/src/libunicode/libunicode-config-version.cmake.in --- old/libunicode-0.3.0/src/libunicode/libunicode-config-version.cmake.in 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/libunicode-config-version.cmake.in 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ -set(PACKAGE_VERSION "@version@") -if(NOT "${PACKAGE_FIND_VERSION}" VERSION_GREATER "@version@") - set(PACKAGE_VERSION_COMPATIBLE 1) # compatible with older - if("${PACKAGE_FIND_VERSION}" VERSION_EQUAL "@version@") - set(PACKAGE_VERSION_EXACT 1) # exact match for this version - endif() -endif() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/libunicode-config.cmake.in new/libunicode-0.4.0/src/libunicode/libunicode-config.cmake.in --- old/libunicode-0.3.0/src/libunicode/libunicode-config.cmake.in 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/libunicode-config.cmake.in 2023-11-27 21:40:54.000000000 +0100 @@ -1,10 +1,8 @@ -# Compute installation prefix relative to this file. -get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH) -get_filename_component(_prefix "${_dir}/../../.." ABSOLUTE) +@PACKAGE_INIT@ -# Import the targets. -include("${_dir}/@TARGETS_EXPORT_NAME@.cmake") +# prevent repeatedly including the targets +if(NOT TARGET unicode::core) + include(${CMAKE_CURRENT_LIST_DIR}/libunicode-targets.cmake) +endif() -# Report other information. -set(libunicode_INCLUDE_DIRS "${_prefix}/include") -set(libunicode_FOUND TRUE) +message(STATUS "Found @PROJECT_NAME@, version: ${@PROJECT_NAME@_VERSION}") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/run_segmenter_test.cpp new/libunicode-0.4.0/src/libunicode/run_segmenter_test.cpp --- old/libunicode-0.3.0/src/libunicode/run_segmenter_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/run_segmenter_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -18,7 +18,7 @@ #include <fmt/format.h> #include <fmt/ostream.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> #include <array> #include <sstream> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/scan.cpp new/libunicode-0.4.0/src/libunicode/scan.cpp --- old/libunicode-0.3.0/src/libunicode/scan.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/scan.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -42,7 +42,11 @@ [[maybe_unused]] int countTrailingZeroBits(unsigned int value) noexcept { #if defined(_WIN32) - return _tzcnt_u32(value); + // return _tzcnt_u32(value); + // Don't do _tzcnt_u32, because that's only available on x86-64, but not on ARM64. + unsigned long r = 0; + _BitScanForward(&r, value); + return r; #else return __builtin_ctz(value); #endif @@ -213,7 +217,8 @@ assert(resultStart <= resultEnd); - return { count, input, resultStart, resultEnd }; + state.next = input; + return { count, resultStart, resultEnd }; } scan_result scan_text(scan_state& state, std::string_view text, size_t maxColumnCount) noexcept @@ -240,7 +245,10 @@ Complex }; - auto result = scan_result { 0, text.data(), text.data(), text.data() }; + auto result = scan_result { 0, text.data(), text.data() }; + + if (state.next == nullptr) + state.next = text.data(); // If state indicates that we previously started consuming a UTF-8 sequence but did not complete yet, // attempt to finish that one first. @@ -255,7 +263,7 @@ return result; auto nextState = is_complex(text.front()) ? NextState::Complex : NextState::Trivial; - while (result.count < maxColumnCount && result.next != (text.data() + text.size())) + while (result.count < maxColumnCount && state.next != (text.data() + text.size())) { switch (nextState) { @@ -265,7 +273,7 @@ return result; receiver.receiveAsciiSequence(text.substr(0, count)); result.count += count; - result.next += count; + state.next += count; result.end += count; nextState = NextState::Complex; text.remove_prefix(count); @@ -274,7 +282,6 @@ case NextState::Complex: { auto const sub = detail::scan_for_text_nonascii(state, text, maxColumnCount - result.count, receiver); - result.next = sub.next; if (!sub.count) return result; nextState = NextState::Trivial; @@ -287,7 +294,7 @@ } assert(result.start <= result.end); - assert(result.end <= result.next); + assert(result.end <= state.next); return result; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/scan.h new/libunicode-0.4.0/src/libunicode/scan.h --- old/libunicode-0.3.0/src/libunicode/scan.h 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/scan.h 2023-11-27 21:40:54.000000000 +0100 @@ -28,9 +28,6 @@ /// Codepoints with property East Asian Width Wide are treated as two columns. size_t count; - /// Pointer to one byte after the last scanned codepoint. - char const* next; - /// Pointer to UTF-8 grapheme cluster start. char const* start; @@ -48,6 +45,9 @@ { utf8_decoder_state utf8 {}; char32_t lastCodepointHint {}; + + /// Pointer to one byte after the last scanned codepoint. + char const* next {}; }; /// Callback-interface that allows precisely understanding the structure of a UTF-8 sequence. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/scan_test.cpp new/libunicode-0.4.0/src/libunicode/scan_test.cpp --- old/libunicode-0.3.0/src/libunicode/scan_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/scan_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -17,7 +17,7 @@ #include <fmt/format.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> #include <string_view> @@ -76,53 +76,6 @@ return escape(begin(s), end(s)); } -unicode::scan_result scan_for_text_nonascii(string_view text, - size_t maxColumnCount, - char32_t* lastCodepointHint, - unicode::utf8_decoder_state* utf8DecoderState = nullptr) noexcept -{ - auto state = unicode::scan_state {}; - if (lastCodepointHint) - state.lastCodepointHint = *lastCodepointHint; - - if (utf8DecoderState) - state.utf8 = *utf8DecoderState; - - auto const result = - unicode::detail::scan_for_text_nonascii(state, text, maxColumnCount, unicode::null_receiver::get()); - - if (lastCodepointHint) - *lastCodepointHint = state.lastCodepointHint; - - if (utf8DecoderState) - *utf8DecoderState = state.utf8; - - return result; -} - -unicode::scan_result scan_text(std::string_view text, - size_t maxColumnCount, - char32_t* lastCodepointHint, - unicode::utf8_decoder_state* utf8DecoderState = nullptr) noexcept -{ - auto state = unicode::scan_state {}; - if (lastCodepointHint) - state.lastCodepointHint = *lastCodepointHint; - - if (utf8DecoderState) - state.utf8 = *utf8DecoderState; - - auto const result = unicode::scan_text(state, text, maxColumnCount); - - if (lastCodepointHint) - *lastCodepointHint = state.lastCodepointHint; - - if (utf8DecoderState) - *utf8DecoderState = state.utf8; - - return result; -} - class grapheme_cluster_collector final: public unicode::grapheme_cluster_receiver { public: @@ -182,86 +135,96 @@ TEST_CASE("scan.complex.grapheme_cluster.1") { + auto state = unicode::scan_state {}; auto const familyEmoji8 = u8(FamilyEmoji); - auto const result = scan_for_text_nonascii(familyEmoji8, 80, nullptr); + auto const result = + unicode::detail::scan_for_text_nonascii(state, familyEmoji8, 80, unicode::null_receiver::get()); CHECK(result.count == 2); - CHECK(result.next == familyEmoji8.data() + familyEmoji8.size()); + CHECK(state.next == familyEmoji8.data() + familyEmoji8.size()); } TEST_CASE("scan.complex.grapheme_cluster.2") { + auto state = unicode::scan_state {}; auto const familyEmoji8 = u8(FamilyEmoji) + u8(FamilyEmoji); - auto const result = scan_for_text_nonascii(familyEmoji8, 80, nullptr); + auto const result = + unicode::detail::scan_for_text_nonascii(state, familyEmoji8, 80, unicode::null_receiver::get()); CHECK(result.count == 4); - CHECK(result.next == familyEmoji8.data() + familyEmoji8.size()); + CHECK(state.next == familyEmoji8.data() + familyEmoji8.size()); } TEST_CASE("scan.complex.mixed") { + auto state = unicode::scan_state {}; auto const text = u8(FamilyEmoji) + "ABC"s + u8(FamilyEmoji); - auto const result = scan_for_text_nonascii(text, 80, nullptr); + auto const result = + unicode::detail::scan_for_text_nonascii(state, text, 80, unicode::null_receiver::get()); CHECK(result.count == 2); - CHECK(result.next == text.data() + u8(FamilyEmoji).size()); + CHECK(state.next == text.data() + u8(FamilyEmoji).size()); } TEST_CASE("scan.complex.half-overflowing") { + auto state = unicode::scan_state {}; auto const oneEmoji = u8(SmileyEmoji); auto const text = oneEmoji + oneEmoji + oneEmoji; // match at boundary - auto const result2 = scan_for_text_nonascii(text, 2, nullptr); + auto const result2 = + unicode::detail::scan_for_text_nonascii(state, text, 2, unicode::null_receiver::get()); CHECK(result2.count == 2); - CHECK(result2.next == text.data() + oneEmoji.size()); + CHECK(state.next == text.data() + oneEmoji.size()); // one grapheme cluster is half overflowing - auto const result3 = scan_for_text_nonascii(text, 3, nullptr); + auto const result3 = + unicode::detail::scan_for_text_nonascii(state, text, 3, unicode::null_receiver::get()); CHECK(result3.count == 2); - CHECK(result3.next == text.data() + oneEmoji.size()); + CHECK(state.next == text.data() + oneEmoji.size()); // match buondary - auto const result4 = scan_for_text_nonascii(text, 4, nullptr); + auto const result4 = + unicode::detail::scan_for_text_nonascii(state, text, 4, unicode::null_receiver::get()); CHECK(result4.count == 4); - CHECK(result4.next == text.data() + 2 * oneEmoji.size()); + CHECK(state.next == text.data() + 2 * oneEmoji.size()); } TEST_CASE("scan.any.tiny") { // Ensure that we're really only scanning up to the input's size (1 byte, here). + auto state = unicode::scan_state {}; auto const storage = "X{0123456789ABCDEF}"sv; auto const input = storage.substr(0, 1); - auto const result = scan_text(input, 80, nullptr); + auto const result = unicode::scan_text(state, input, 80); CHECK(result.count == 1); - CHECK(result.next == input.data() + input.size()); - CHECK(*result.next == '{'); + CHECK(state.next == input.data() + input.size()); + CHECK(*state.next == '{'); } TEST_CASE("scan.complex.sliced_calls") { + auto state = unicode::scan_state {}; auto const text = "\xF0\x9F\x98\x80\033\\0123456789ABCDEF"sv; // U+1F600 auto constexpr splitOffset = 3; auto const chunkOne = std::string_view(text.data(), splitOffset); - auto lastCodepointHint = char32_t { 0 }; - auto utf8DecodeState = unicode::utf8_decoder_state {}; - auto result = scan_text(chunkOne, 80, &lastCodepointHint, &utf8DecodeState); + auto result = unicode::scan_text(state, chunkOne, 80); - REQUIRE(utf8DecodeState.expectedLength == 4); - REQUIRE(utf8DecodeState.currentLength == 3); + REQUIRE(state.utf8.expectedLength == 4); + REQUIRE(state.utf8.currentLength == 3); CHECK(result.count == 0); CHECK(result.start == text.data()); CHECK(result.end == text.data()); - CHECK(result.next == (text.data() + splitOffset)); + CHECK(state.next == (text.data() + splitOffset)); auto const chunkTwo = - std::string_view(result.next, (size_t) std::distance(result.next, text.data() + text.size())); - result = scan_text(chunkTwo, 80, &lastCodepointHint, &utf8DecodeState); + std::string_view(state.next, (size_t) std::distance(state.next, text.data() + text.size())); + result = unicode::scan_text(state, chunkTwo, 80, unicode::null_receiver::get()); - REQUIRE(utf8DecodeState.expectedLength == 0); + REQUIRE(state.utf8.expectedLength == 0); CHECK(result.count == 2); REQUIRE(result.start == text.data()); REQUIRE(result.end == text.data() + 4); - REQUIRE(result.next == text.data() + 4); + REQUIRE(state.next == text.data() + 4); auto const resultingText = string_view(result.start, static_cast<size_t>(std::distance(result.start, result.end))); REQUIRE(resultingText == text.substr(0, 4)); @@ -279,7 +242,8 @@ s += (k % 2) != 0 ? oneSimple : oneComplex; s += ControlCodes; - auto const result = scan_text(s, 80, nullptr); + auto state = unicode::scan_state {}; + auto const result = scan_text(state, s, 80); auto const countSimple = ((i + 1) / 2) * 20; auto const countComplex = (i / 2) * 2; @@ -292,7 +256,7 @@ escape(s))); CHECK(result.count == countSimple + countComplex); - CHECK(result.next == s.data() + s.size() - ControlCodes.size()); + CHECK(state.next == s.data() + s.size() - ControlCodes.size()); } } @@ -308,9 +272,10 @@ s += (k % 2) != 0 ? oneComplex : oneSimple; s += ControlCodes; - auto const result = scan_text(s, 80, nullptr); + auto state = unicode::scan_state {}; + auto const result = unicode::scan_text(state, s, 80); CHECK(result.count == (i / 2) * 20 + ((i + 1) / 2) * 2); - CHECK(result.next == s.data() + s.size() - ControlCodes.size()); + CHECK(state.next == s.data() + s.size() - ControlCodes.size()); } } @@ -320,21 +285,25 @@ auto const modifierVS16 = u8(U"\uFE0F"sv); // // narrow copyright sign - auto const result1 = scan_text(oneComplex, 80, nullptr); + auto state = unicode::scan_state {}; + auto const result1 = unicode::scan_text(state, oneComplex, 80); CHECK(result1.count == 1); - CHECK(result1.next == oneComplex.data() + oneComplex.size()); + CHECK(state.next == oneComplex.data() + oneComplex.size()); // copyright sign in emoji presentation + state = {}; auto const s = oneComplex + modifierVS16; - auto const result = scan_text(s, 80, nullptr); + auto const result = unicode::scan_text(state, s, 80); CHECK(result.count == 2); - CHECK(result.next == s.data() + s.size()); + CHECK(state.next == s.data() + s.size()); - auto const result3 = scan_text(s, 1, nullptr); + state = {}; + auto const result3 = unicode::scan_text(state, s, 1); CHECK(result3.count == 0); - CHECK(result3.next == s.data()); + CHECK(state.next == s.data()); } +#if 0 namespace { @@ -441,3 +410,4 @@ U"A", U"B", U"C", U"D", U"E", U"F" }); // clang-format on } +#endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/script_segmenter_test.cpp new/libunicode-0.4.0/src/libunicode/script_segmenter_test.cpp --- old/libunicode-0.3.0/src/libunicode/script_segmenter_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/script_segmenter_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -13,7 +13,7 @@ */ #include <libunicode/script_segmenter.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> #include <string> #include <string_view> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/support.h new/libunicode-0.4.0/src/libunicode/support.h --- old/libunicode-0.3.0/src/libunicode/support.h 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/support.h 2023-11-27 21:40:54.000000000 +0100 @@ -37,26 +37,26 @@ class out { public: - constexpr out(std::reference_wrapper<T> _ref) noexcept: ref_ { &_ref.value() } {} - constexpr explicit out(T& _ref) noexcept: ref_ { &_ref } {} + constexpr out(std::reference_wrapper<T> ref) noexcept: _ref { &ref.value() } {} + constexpr explicit out(T& ref) noexcept: _ref { &ref } {} constexpr out(out const&) noexcept = default; constexpr out(out&&) noexcept = default; constexpr out& operator=(out const&) noexcept = default; constexpr out& operator=(out&&) noexcept = default; - constexpr T& get() noexcept { return *ref_; } - constexpr T const& get() const noexcept { return *ref_; } + constexpr T& get() noexcept { return *_ref; } + constexpr T const& get() const noexcept { return *_ref; } - constexpr T& operator*() noexcept { return *ref_; } - constexpr T const& operator*() const noexcept { return *ref_; } + constexpr T& operator*() noexcept { return *_ref; } + constexpr T const& operator*() const noexcept { return *_ref; } - constexpr T* operator->() noexcept { return ref_; } - constexpr T const* operator->() const noexcept { return ref_; } + constexpr T* operator->() noexcept { return _ref; } + constexpr T const* operator->() const noexcept { return _ref; } - constexpr void assign(T _value) { *ref_ = std::move(_value); } + constexpr void assign(T value) { *_ref = std::move(value); } private: - T* ref_; + T* _ref; }; // dynamic array with a fixed capacity. @@ -82,11 +82,11 @@ constexpr size_t size() const noexcept { return size_; } constexpr bool empty() const noexcept { return size_ == 0; } - constexpr bool push_back(T _value) noexcept + constexpr bool push_back(T value) noexcept { if (size_ == N) return false; - values_[size_++] = std::move(_value); + values_[size_++] = std::move(value); return true; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/test_main.cpp new/libunicode-0.4.0/src/libunicode/test_main.cpp --- old/libunicode-0.3.0/src/libunicode/test_main.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/test_main.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -12,7 +12,7 @@ * limitations under the License. */ #define CATCH_CONFIG_RUNNER -#include <catch2/catch.hpp> +#include <catch2/catch_session.hpp> int main(int argc, char const* argv[]) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/ucd_private.h new/libunicode-0.4.0/src/libunicode/ucd_private.h --- old/libunicode-0.3.0/src/libunicode/ucd_private.h 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/ucd_private.h 2023-11-27 21:40:54.000000000 +0100 @@ -19,24 +19,24 @@ namespace unicode { -struct Interval +struct Interval // NOLINT(readability-identifier-naming) { char32_t from; char32_t to; }; template <size_t N> -constexpr bool contains(std::array<Interval, N> const& _ranges, char32_t _codepoint) noexcept +constexpr bool contains(std::array<Interval, N> const& ranges, char32_t codepoint) noexcept { auto a = size_t { 0 }; - auto b = static_cast<size_t>(_ranges.size()) - 1; + auto b = static_cast<size_t>(ranges.size()) - 1; while (a < b) { auto const i = ((b + a) / 2); - auto const& I = _ranges[i]; - if (I.to < _codepoint) + auto const& I = ranges[i]; + if (I.to < codepoint) a = i + 1; - else if (I.from > _codepoint) + else if (I.from > codepoint) { if (i == 0) return false; @@ -45,36 +45,36 @@ else return true; } - return a == b && _ranges[a].from <= _codepoint && _codepoint <= _ranges[a].to; + return a == b && ranges[a].from <= codepoint && codepoint <= ranges[a].to; } template <typename T> -struct Prop +struct Prop // NOLINT(readability-identifier-naming) { Interval interval; T property; }; template <typename T, size_t N> -constexpr std::optional<T> search(std::array<Prop<T>, N> const& _ranges, char32_t _codepoint) +constexpr std::optional<T> search(std::array<Prop<T>, N> const& ranges, char32_t codepoint) { auto a = size_t { 0 }; - auto b = static_cast<size_t>(_ranges.size()) - 1; + auto b = static_cast<size_t>(ranges.size()) - 1; while (a < b) { auto const i = static_cast<size_t>((b + a) / 2); - auto const& I = _ranges[i]; - if (I.interval.to < _codepoint) + auto const& I = ranges[i]; + if (I.interval.to < codepoint) a = i + 1; - else if (I.interval.from > _codepoint) + else if (I.interval.from > codepoint) b = i - 1; else return I.property; } - if (a == b && _ranges[a].interval.from <= _codepoint && _codepoint <= _ranges[a].interval.to) - return _ranges[a].property; + if (a == b && ranges[a].interval.from <= codepoint && codepoint <= ranges[a].interval.to) + return ranges[a].property; return std::nullopt; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/utf8.cpp new/libunicode-0.4.0/src/libunicode/utf8.cpp --- old/libunicode-0.3.0/src/libunicode/utf8.cpp 1970-01-01 01:00:00.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/utf8.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -0,0 +1,80 @@ +/** + * This file is part of the "libunicode" project + * Copyright (c) 2020 Christian Parpart <christian@parpart.family> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <libunicode/utf8.h> + +namespace unicode +{ + +ConvertResult from_utf8(utf8_decoder_state& state, uint8_t value) noexcept +{ + if (!state.expectedLength) + { + if ((value & 0b1000'0000) == 0) + { + state.currentLength = 1; + return Success { value }; + } + else if ((value & 0b1110'0000) == 0b1100'0000) + { + state.currentLength = 1; + state.expectedLength = 2; + state.character = value & 0b0001'1111; + } + else if ((value & 0b1111'0000) == 0b1110'0000) + { + state.currentLength = 1; + state.expectedLength = 3; + state.character = value & 0b0000'1111; + } + else if ((value & 0b1111'1000) == 0b1111'0000) + { + state.currentLength = 1; + state.expectedLength = 4; + state.character = value & 0b0000'0111; + } + else + { + state.currentLength = 1; + state.expectedLength = 0; + return Invalid {}; + } + } + // clang-format off + else if ((value & 0b1110'0000) == 0b1100'0000 + || (value & 0b1111'0000) == 0b1110'0000 + || (value & 0b1111'1000) == 0b1111'0000) + // clang-format on + { + // We have a new codepoint, but the previous one was incomplete. + state.expectedLength = 0; + // Return Invalid for the current incomplete codepoint, + // but have already started the next codepoint. + from_utf8(state, value); + return { Invalid {} }; + } + else + { + state.character <<= 6; + state.character |= value & 0b0011'1111; + state.currentLength++; + } + + if (state.currentLength < state.expectedLength) + return { Incomplete {} }; + + state.expectedLength = 0; // reset state + return { Success { state.character } }; +} + +} // namespace unicode diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/utf8.h new/libunicode-0.4.0/src/libunicode/utf8.h --- old/libunicode-0.3.0/src/libunicode/utf8.h 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/utf8.h 2023-11-27 21:40:54.000000000 +0100 @@ -24,68 +24,68 @@ /// Converts an UTF-32 codepoint into a UTF-8 sequence. /// -/// @param _character UTF-32 character to encode to UTF-8 -/// @param _result target memory location to start writing to (up to 4 chars) +/// @param character UTF-32 character to encode to UTF-8 +/// @param result target memory location to start writing to (up to 4 chars) /// /// @return number of bytes written to. -constexpr inline unsigned to_utf8(char32_t _character, uint8_t* _result) +constexpr inline unsigned to_utf8(char32_t character, uint8_t* result) { - if (_character <= 0x7F) + if (character <= 0x7F) { - _result[0] = static_cast<uint8_t>(_character & 0b0111'1111); + result[0] = static_cast<uint8_t>(character & 0b0111'1111); return 1; } - else if (_character <= 0x07FF) + else if (character <= 0x07FF) { - _result[0] = static_cast<uint8_t>(((_character >> 6) & 0b0001'1111) | 0b1100'0000); - _result[1] = static_cast<uint8_t>(((_character >> 0) & 0b0011'1111) | 0b1000'0000); + result[0] = static_cast<uint8_t>(((character >> 6) & 0b0001'1111) | 0b1100'0000); + result[1] = static_cast<uint8_t>(((character >> 0) & 0b0011'1111) | 0b1000'0000); return 2; } - if (_character <= 0xFFFF) + if (character <= 0xFFFF) { - _result[0] = static_cast<uint8_t>(((_character >> 12) & 0b0000'1111) | 0b1110'0000); - _result[1] = static_cast<uint8_t>(((_character >> 6) & 0b0011'1111) | 0b1000'0000); - _result[2] = static_cast<uint8_t>(((_character >> 0) & 0b0011'1111) | 0b1000'0000); + result[0] = static_cast<uint8_t>(((character >> 12) & 0b0000'1111) | 0b1110'0000); + result[1] = static_cast<uint8_t>(((character >> 6) & 0b0011'1111) | 0b1000'0000); + result[2] = static_cast<uint8_t>(((character >> 0) & 0b0011'1111) | 0b1000'0000); return 3; } else { - _result[0] = static_cast<uint8_t>(((_character >> 18) & 0b0000'0111) | 0b1111'0000); - _result[1] = static_cast<uint8_t>(((_character >> 12) & 0b0011'1111) | 0b1000'0000); - _result[2] = static_cast<uint8_t>(((_character >> 6) & 0b0011'1111) | 0b1000'0000); - _result[3] = static_cast<uint8_t>(((_character >> 0) & 0b0011'1111) | 0b1000'0000); + result[0] = static_cast<uint8_t>(((character >> 18) & 0b0000'0111) | 0b1111'0000); + result[1] = static_cast<uint8_t>(((character >> 12) & 0b0011'1111) | 0b1000'0000); + result[2] = static_cast<uint8_t>(((character >> 6) & 0b0011'1111) | 0b1000'0000); + result[3] = static_cast<uint8_t>(((character >> 0) & 0b0011'1111) | 0b1000'0000); return 4; } } /// Converts a UTF-32 string into an UTF-8 sring. -inline std::string to_utf8(char32_t const* _characters, size_t n) +inline std::string to_utf8(char32_t const* characters, size_t n) { std::string s; s.reserve(n); for (size_t i = 0; i < n; ++i) { uint8_t bytes[4]; - unsigned const len = to_utf8(_characters[i], bytes); + unsigned const len = to_utf8(characters[i], bytes); s.append((char const*) bytes, len); } return s; } -inline std::string to_utf8(char32_t _character) +inline std::string to_utf8(char32_t character) { - return to_utf8(&_character, 1); + return to_utf8(&character, 1); } -inline std::string to_utf8(std::u32string const& _characters) +inline std::string to_utf8(std::u32string const& characters) { - return to_utf8(_characters.data(), _characters.size()); + return to_utf8(characters.data(), characters.size()); } -inline std::string to_utf8(std::u32string_view const& _characters) +inline std::string to_utf8(std::u32string_view const& characters) { - return to_utf8(_characters.data(), _characters.size()); + return to_utf8(characters.data(), characters.size()); } struct utf8_decoder_state @@ -95,70 +95,22 @@ unsigned currentLength = 0; }; -struct Invalid -{ -}; -struct Incomplete -{ -}; -struct Success -{ - char32_t value; -}; +// clang-format off +// NOLINTBEGIN(readability-identifier-naming) +struct Invalid { }; +struct Incomplete { }; +struct Success { char32_t value; }; +// NOLINTEND(readability-identifier-naming) +// clang-format on using ConvertResult = std::variant<Invalid, Incomplete, Success>; /// Progressively decodes a UTF-8 codepoint. -inline ConvertResult from_utf8(utf8_decoder_state& _state, uint8_t _byte) -{ - if (!_state.expectedLength) - { - if ((_byte & 0b1000'0000) == 0) - { - _state.currentLength = 1; - return Success { _byte }; - } - else if ((_byte & 0b1110'0000) == 0b1100'0000) - { - _state.currentLength = 1; - _state.expectedLength = 2; - _state.character = _byte & 0b0001'1111; - } - else if ((_byte & 0b1111'0000) == 0b1110'0000) - { - _state.currentLength = 1; - _state.expectedLength = 3; - _state.character = _byte & 0b0000'1111; - } - else if ((_byte & 0b1111'1000) == 0b1111'0000) - { - _state.currentLength = 1; - _state.expectedLength = 4; - _state.character = _byte & 0b0000'0111; - } - else - { - _state.currentLength = 1; - return Invalid {}; - } - } - else - { - _state.character <<= 6; - _state.character |= _byte & 0b0011'1111; - _state.currentLength++; - } - - if (_state.currentLength < _state.expectedLength) - return { Incomplete {} }; - - _state.expectedLength = 0; // reset state - return { Success { _state.character } }; -} +ConvertResult from_utf8(utf8_decoder_state& state, uint8_t value) noexcept; -inline unsigned from_utf8i(utf8_decoder_state& _state, uint8_t _byte) +inline unsigned from_utf8i(utf8_decoder_state& state, uint8_t value) { - auto const result = from_utf8(_state, _byte); + auto const result = from_utf8(state, value); if (std::holds_alternative<Incomplete>(result)) return static_cast<unsigned>(-1); @@ -169,43 +121,43 @@ return std::get<Success>(result).value; } -inline ConvertResult from_utf8(uint8_t const* _bytes, size_t* _size) +inline ConvertResult from_utf8(uint8_t const* bytes, size_t* size) { auto state = utf8_decoder_state {}; auto result = ConvertResult {}; do - result = from_utf8(state, *_bytes++); + result = from_utf8(state, *bytes++); while (std::holds_alternative<Incomplete>(result)); - if (_size) - *_size = state.currentLength; + if (size) + *size = state.currentLength; return result; } #if 0 // TODO(do that later) __cplusplus > 201703L // C++20 (char8_t) -inline ConvertResult from_utf8(char8_t const* _bytes, size_t* _size) +inline ConvertResult from_utf8(char8_t const* bytes, size_t* size) { - return from_utf8((uint8_t const*)(_bytes), _size); + return from_utf8((uint8_t const*)(bytes), size); } #endif -inline ConvertResult from_utf8(char const* _bytes, size_t* _size) +inline ConvertResult from_utf8(char const* bytes, size_t* size) { - return from_utf8((uint8_t const*) (_bytes), _size); + return from_utf8((uint8_t const*) (bytes), size); } template <typename T = char32_t> -inline std::basic_string<T> from_utf8(std::string_view const& _bytes) +inline std::basic_string<T> from_utf8(std::string_view bytes) { static_assert(sizeof(T) == 4); std::basic_string<T> s; size_t offset = 0; - while (offset < _bytes.size()) + while (offset < bytes.size()) { size_t i {}; - ConvertResult const result = from_utf8(_bytes.data() + offset, &i); + ConvertResult const result = from_utf8(bytes.data() + offset, &i); if (std::holds_alternative<Success>(result)) s += T(std::get<Success>(result).value); offset += i; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/utf8_grapheme_segmenter_test.cpp new/libunicode-0.4.0/src/libunicode/utf8_grapheme_segmenter_test.cpp --- old/libunicode-0.3.0/src/libunicode/utf8_grapheme_segmenter_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/utf8_grapheme_segmenter_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -15,7 +15,7 @@ #include <fmt/format.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> #include <string_view> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/utf8_test.cpp new/libunicode-0.4.0/src/libunicode/utf8_test.cpp --- old/libunicode-0.3.0/src/libunicode/utf8_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/utf8_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -16,7 +16,7 @@ #include <fmt/format.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> #include <array> #include <cassert> @@ -146,6 +146,77 @@ CHECK(a32 == U"HiHo"); } +TEST_CASE("utf8.from_utf8.incomplete.2", "[utf8]") +{ + // Ensure incomplete bytes are consumed and reported as Invalid accordingly. + auto state = utf8_decoder_state {}; + + // We start with an incomplete 2-byte sequence. + auto const r0 = from_utf8(state, 0xC7); + REQUIRE(holds_alternative<Incomplete>(r0)); + + // Continue with another 2-byte sequence, + // while the first one is still incomplete. + auto const r1 = from_utf8(state, 0xC7); + REQUIRE(holds_alternative<Invalid>(r1)); + auto const r2 = from_utf8(state, 0x8E); + REQUIRE(holds_alternative<Success>(r2)); + REQUIRE((unsigned) get<Success>(r2).value == 0x01CE); +} + +TEST_CASE("utf8.from_utf8.incomplete.3", "[utf8]") +{ + // Ensure incomplete bytes are consumed and reported as Invalid accordingly. + auto state = utf8_decoder_state {}; + + // We start with an incomplete 2-byte sequence. + auto const r0 = from_utf8(state, 0xE2); + REQUIRE(holds_alternative<Incomplete>(r0)); + auto const r1 = from_utf8(state, 0x82); + REQUIRE(holds_alternative<Incomplete>(r1)); + + // Continue with another 2-byte sequence, + // while the first one is still incomplete. + auto const r2 = from_utf8(state, 0xE2); + REQUIRE(holds_alternative<Invalid>(r2)); + auto const r3 = from_utf8(state, 0x82); + REQUIRE(holds_alternative<Incomplete>(r3)); + auto const r4 = from_utf8(state, 0xAC); + REQUIRE(holds_alternative<Success>(r4)); + REQUIRE((unsigned) get<Success>(r4).value == 0x20AC); +} + +TEST_CASE("utf8.from_utf8.incomplete.4", "[utf8]") +{ + auto constexpr sequence = "\xF0\x9F\x8D\xA3"sv; + auto constexpr codepoint = 0x1F363; + + auto state = utf8_decoder_state {}; + + // Generate an incomplete multi-byte sequence. + for (size_t i = 0; i < sequence.size() - 1; ++i) + { + CAPTURE(i, unsigned(sequence[i])); + auto const r = from_utf8(state, (uint8_t) sequence[i]); + REQUIRE(holds_alternative<Incomplete>(r)); + } + + // Now fill the multi-byte sequence, but completely. + auto const r0 = from_utf8(state, (uint8_t) sequence[0]); + REQUIRE(holds_alternative<Invalid>(r0)); + + for (size_t i = 1; i < sequence.size() - 1; ++i) + { + CAPTURE(i, unsigned(sequence[i])); + auto const ri = from_utf8(state, (uint8_t) sequence[i]); + REQUIRE(holds_alternative<Incomplete>(ri)); + } + + auto const last = from_utf8(state, (uint8_t) sequence.back()); + REQUIRE(holds_alternative<Success>(last)); + REQUIRE(get<Success>(last).value == codepoint); +} + TEST_CASE("utf8.iter", "[utf8]") { auto constexpr values = string_view { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/width_test.cpp new/libunicode-0.4.0/src/libunicode/width_test.cpp --- old/libunicode-0.3.0/src/libunicode/width_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/width_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -13,7 +13,7 @@ */ #include <libunicode/width.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> TEST_CASE("random test", "[width]") { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/word_segmenter.h new/libunicode-0.4.0/src/libunicode/word_segmenter.h --- old/libunicode-0.3.0/src/libunicode/word_segmenter.h 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/word_segmenter.h 2023-11-27 21:40:54.000000000 +0100 @@ -25,64 +25,64 @@ using iterator = char_type const*; using view_type = std::basic_string_view<char_type>; - constexpr word_segmenter(std::basic_string_view<char_type> const& _str): - word_segmenter(_str.data(), _str.data() + _str.size()) + constexpr word_segmenter(std::basic_string_view<char_type> const& str): + word_segmenter(str.data(), str.data() + str.size()) { } constexpr word_segmenter(): word_segmenter({}, {}) {} constexpr bool empty() const noexcept { return size() == 0; } - constexpr std::size_t size() const noexcept { return static_cast<size_t>(right_ - left_); } - constexpr view_type operator*() const noexcept { return view_type(left_, size()); } + constexpr std::size_t size() const noexcept { return static_cast<size_t>(_right - _left); } + constexpr view_type operator*() const noexcept { return view_type(_left, size()); } constexpr word_segmenter& operator++() noexcept { - left_ = right_; - while (right_ != end_) + _left = _right; + while (_right != _end) { - switch (state_) + switch (_state) { case State::NoWord: - if (!isDelimiter(*right_)) + if (!isDelimiter(*_right)) { - state_ = State::Word; + _state = State::Word; return *this; } break; case State::Word: - if (isDelimiter(*right_)) + if (isDelimiter(*_right)) { - state_ = State::NoWord; + _state = State::NoWord; return *this; } break; } - ++right_; + ++_right; } return *this; } - constexpr bool operator==(word_segmenter const& _rhs) const noexcept + constexpr bool operator==(word_segmenter const& rhs) const noexcept { - return left_ == _rhs.left_ && right_ == _rhs.right_; + return _left == rhs._left && _right == rhs._right; } - constexpr bool operator!=(word_segmenter const& _rhs) const noexcept { return !(*this == _rhs); } + constexpr bool operator!=(word_segmenter const& rhs) const noexcept { return !(*this == rhs); } private: - constexpr word_segmenter(iterator _begin, iterator _end): - left_ { _begin }, - right_ { _begin }, - state_ { _begin != _end ? (isDelimiter(*right_) ? State::NoWord : State::Word) : State::NoWord }, - end_ { _end } + constexpr word_segmenter(iterator begin, iterator end): + _left { begin }, + _right { begin }, + _state { begin != end ? (isDelimiter(*_right) ? State::NoWord : State::Word) : State::NoWord }, + _end { end } { ++*this; } - constexpr bool isDelimiter(char_type _char) const noexcept + constexpr bool isDelimiter(char_type character) const noexcept { - switch (_char) + switch (character) { case ' ': case '\r': @@ -100,10 +100,10 @@ NoWord }; - iterator left_; - iterator right_; - State state_; - iterator end_; + iterator _left; + iterator _right; + State _state; + iterator _end; }; } // namespace unicode diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libunicode-0.3.0/src/libunicode/word_segmenter_test.cpp new/libunicode-0.4.0/src/libunicode/word_segmenter_test.cpp --- old/libunicode-0.3.0/src/libunicode/word_segmenter_test.cpp 2023-03-01 08:59:22.000000000 +0100 +++ new/libunicode-0.4.0/src/libunicode/word_segmenter_test.cpp 2023-11-27 21:40:54.000000000 +0100 @@ -13,7 +13,7 @@ */ #include <libunicode/word_segmenter.h> -#include <catch2/catch.hpp> +#include <catch2/catch_test_macros.hpp> using namespace unicode; using namespace std::string_literals; ++++++ libunicode-fix-catch-in-cmake.patch ++++++ diff -ruN libunicode-0.4.0.orig/src/libunicode/CMakeLists.txt libunicode-0.4.0/src/libunicode/CMakeLists.txt --- libunicode-0.4.0.orig/src/libunicode/CMakeLists.txt 2023-11-28 03:40:54.000000000 +0700 +++ libunicode-0.4.0/src/libunicode/CMakeLists.txt 2023-12-21 12:29:29.934925268 +0700 @@ -223,10 +223,10 @@ # supress conversion warnings for Catch2 # https://github.com/catchorg/Catch2/issues/2583 # https://github.com/SFML/SFML/blob/e45628e2ebc5843baa3739781276fa85a54d4653/t... - set_target_properties(Catch2 PROPERTIES COMPILE_OPTIONS "" EXPORT_COMPILE_COMMANDS OFF) - set_target_properties(Catch2WithMain PROPERTIES EXPORT_COMPILE_COMMANDS OFF) - get_target_property(CATCH2_INCLUDE_DIRS Catch2 INTERFACE_INCLUDE_DIRECTORIES) - target_include_directories(Catch2 SYSTEM INTERFACE ${CATCH2_INCLUDE_DIRS}) + # (Catch2 PROPERTIES COMPILE_OPTIONS "" EXPORT_COMPILE_COMMANDS OFF) + # set_target_properties(Catch2WithMain PROPERTIES EXPORT_COMPILE_COMMANDS OFF) + # get_target_property(CATCH2_INCLUDE_DIRS Catch2 INTERFACE_INCLUDE_DIRECTORIES) + # target_include_directories(Catch2 SYSTEM INTERFACE ${CATCH2_INCLUDE_DIRS}) target_link_libraries(unicode_test unicode Catch2::Catch2WithMain fmt::fmt-header-only) add_test(unicode_test unicode_test)
participants (1)
-
Source-Sync