Commit ff69763c authored by Brad King's avatar Brad King
Browse files

String: Add a custom string type

Create a `cm::String` type that holds a view of a string buffer and
optionally shares ownership of the buffer.  Instances can either
borrow longer-lived storage (e.g. static storage of string literals)
or internally own a `std::string` instance.  In the latter case,
share ownership with copies and substrings.  Allocate a new internal
string only on operations that require mutation.

This will allow us to recover string sharing semantics that we
used to get from C++98 std::string copy-on-write implementations.
Such implementations are not allowed by C++11 so code our own in
a custom string type instead.
parent 410a3e4b
......@@ -568,6 +568,8 @@ set(SRCS
cmSiteNameCommand.h
cmSourceGroupCommand.cxx
cmSourceGroupCommand.h
cmString.cxx
cmString.hxx
cmStringReplaceHelper.cxx
cmStringCommand.cxx
cmStringCommand.h
......
/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
file Copyright.txt or https://cmake.org/licensing for details. */
#define _SCL_SECURE_NO_WARNINGS
#include "cmString.hxx"
#include <memory>
#include <ostream>
#include <stdexcept>
#include <string>
#include <type_traits>
namespace cm {
static std::string const empty_string_;
void String::internally_mutate_to_stable_string()
{
// We assume that only one thread mutates this instance at
// a time even if we point to a shared string buffer refernced
// by other threads.
*this = String(data(), size());
}
std::string const& String::str()
{
if (!data()) {
// We view no string.
// This is stable for the lifetime of our current value.
return empty_string_;
}
if (string_ && data() == string_->data() && size() == string_->size()) {
// We view an entire string.
// This is stable for the lifetime of our current value.
return *string_;
}
// Mutate to hold a std::string that is stable for the lifetime
// of our current value.
this->internally_mutate_to_stable_string();
return *string_;
}
const char* String::c_str()
{
const char* c = data();
if (c == nullptr) {
return c;
}
// We always point into a null-terminated string so it is safe to
// access one past the end. If it is a null byte then we can use
// the pointer directly.
if (c[size()] == '\0') {
return c;
}
// Mutate to hold a std::string so we can get a null terminator.
this->internally_mutate_to_stable_string();
c = string_->c_str();
return c;
}
String& String::insert(size_type index, size_type count, char ch)
{
std::string s;
s.reserve(size() + count);
s.assign(data(), size());
s.insert(index, count, ch);
return *this = std::move(s);
}
String& String::erase(size_type index, size_type count)
{
if (index > size()) {
throw std::out_of_range("Index out of range in String::erase");
}
size_type const rcount = std::min(count, size() - index);
size_type const rindex = index + rcount;
std::string s;
s.reserve(size() - rcount);
s.assign(data(), index);
s.append(data() + rindex, size() - rindex);
return *this = std::move(s);
}
String String::substr(size_type pos, size_type count) const
{
if (pos > size()) {
throw std::out_of_range("Index out of range in String::substr");
}
return String(*this, pos, count);
}
String::String(std::string&& s, Private)
: string_(std::make_shared<std::string>(std::move(s)))
, view_(string_->data(), string_->size())
{
}
String::size_type String::copy(char* dest, size_type count,
size_type pos) const
{
return view_.copy(dest, count, pos);
}
std::ostream& operator<<(std::ostream& os, String const& s)
{
return os.write(s.data(), s.size());
}
std::string& operator+=(std::string& self, String const& s)
{
return self += s.view();
}
String IntoString<char*>::into_string(const char* s)
{
if (!s) {
return String();
}
return std::string(s);
}
string_view AsStringView<String>::view(String const& s)
{
return s.view();
}
} // namespace cm
/* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
file Copyright.txt or https://cmake.org/licensing for details. */
#ifndef cmString_hxx
#define cmString_hxx
#include "cmConfigure.h" // IWYU pragma: keep
#include "cm_string_view.hxx"
#include <algorithm>
#include <functional>
#include <initializer_list>
#include <memory>
#include <ostream>
#include <string>
#include <type_traits>
namespace cm {
class String;
/**
* Trait to convert type T into a String.
* Implementations must derive from 'std::true_type'
* and define an 'into_string' member that accepts
* type T (by value or reference) and returns one of:
*
* - 'std::string' to construct an owned instance.
* - 'cm::string_view' to construct a borrowed or null instances.
* The buffer from which the view is borrowed must outlive
* all copies of the resulting String, e.g. static storage.
* - 'cm::String' for already-constructed instances.
*/
template <typename T>
struct IntoString : std::false_type
{
};
template <typename T>
struct IntoString<T&> : IntoString<T>
{
};
template <typename T>
struct IntoString<T const> : IntoString<T>
{
};
template <typename T>
struct IntoString<T const*> : IntoString<T*>
{
};
template <typename T, std::string::size_type N>
struct IntoString<T const[N]> : IntoString<T[N]>
{
};
template <>
struct IntoString<char*> : std::true_type
{
static String into_string(const char* s);
};
template <>
struct IntoString<std::nullptr_t> : std::true_type
{
static string_view into_string(std::nullptr_t) { return string_view(); }
};
template <std::string::size_type N>
struct IntoString<char[N]> : std::true_type
{
static std::string into_string(char const (&s)[N])
{
return std::string(s, N - 1);
}
};
template <>
struct IntoString<std::string> : std::true_type
{
static std::string into_string(std::string s) { return s; }
};
template <>
struct IntoString<string_view> : std::true_type
{
static std::string into_string(string_view s) { return std::string(s); }
};
template <>
struct IntoString<char> : std::true_type
{
static std::string into_string(char const& c) { return std::string(1, c); }
};
/**
* Trait to convert type T into a 'cm::string_view'.
* Implementations must derive from 'std::true_type' and
* define a 'view' member that accepts type T (by reference)
* and returns a 'cm::string_view'.
*/
template <typename T>
struct AsStringView : std::false_type
{
};
template <typename T>
struct AsStringView<T&> : AsStringView<T>
{
};
template <typename T>
struct AsStringView<T const> : AsStringView<T>
{
};
template <typename T>
struct AsStringView<T const*> : AsStringView<T*>
{
};
template <typename T, std::string::size_type N>
struct AsStringView<T const[N]> : AsStringView<T[N]>
{
};
template <>
struct AsStringView<char*> : std::true_type
{
static string_view view(const char* s) { return s; }
};
template <std::string::size_type N>
struct AsStringView<char[N]> : std::true_type
{
static string_view view(char const (&s)[N]) { return string_view(s, N - 1); }
};
template <>
struct AsStringView<std::string> : std::true_type
{
static string_view view(std::string const& s) { return s; }
};
template <>
struct AsStringView<char> : std::true_type
{
static string_view view(const char& s) { return string_view(&s, 1); }
};
template <>
struct AsStringView<string_view> : std::true_type
{
static string_view view(string_view const& s) { return s; }
};
template <>
struct AsStringView<String> : std::true_type
{
static string_view view(String const& s);
};
/**
* \class String
*
* A custom string type that holds a view of a string buffer
* and optionally shares ownership of the buffer. Instances
* may have one of the following states:
*
* - null: views and owns nothing.
* Conversion to 'bool' is 'false'.
* 'data()' and 'c_str()' return nullptr.
* 'size()' returns 0.
* 'str()' returns an empty string.
*
* - borrowed: views a string but does not own it. This is used
* to bind to static storage (e.g. string literals) or for
* temporary instances that do not outlive the borrowed buffer.
* Copies and substrings still borrow the original buffer.
* Mutation allocates a new internal string and converts to
* the 'owned' state.
* Conversion to 'bool' is 'true'.
* 'c_str()' may internally mutate to the 'owned' state.
* 'str()' internally mutates to the 'owned' state.
*
* - owned: views an immutable 'std::string' instance owned internally.
* Copies and substrings share ownership of the internal string.
* Mutation allocates a new internal string.
* Conversion to 'bool' is 'true'.
*/
class String
{
enum class Private
{
};
public:
using traits_type = std::string::traits_type;
using value_type = string_view::value_type;
using pointer = string_view::pointer;
using const_pointer = string_view::const_pointer;
using reference = string_view::reference;
using const_reference = string_view::const_reference;
using const_iterator = string_view::const_iterator;
using iterator = string_view::const_iterator;
using const_reverse_iterator = string_view::const_reverse_iterator;
using reverse_iterator = string_view::const_reverse_iterator;
using difference_type = string_view::difference_type;
using size_type = string_view::size_type;
static size_type const npos = string_view::npos;
/** Construct a null string. */
String() = default;
/** Construct from any type implementing the IntoString trait. */
template <typename T,
typename = typename std::enable_if<IntoString<T>::value>::type>
String(T&& s)
: String(IntoString<T>::into_string(std::forward<T>(s)), Private())
{
}
/** Construct via std::string initializer list constructor. */
String(std::initializer_list<char> il)
: String(std::string(il))
{
}
/** Construct by copying the specified buffer. */
String(const char* d, size_type s)
: String(std::string(d, s))
{
}
/** Construct by copying from input iterator range. */
template <typename InputIterator>
String(InputIterator first, InputIterator last)
: String(std::string(first, last))
{
}
/** Construct a string with 'n' copies of character 'c'. */
String(size_type n, char c)
: String(std::string(n, c))
{
}
/** Construct from a substring of another String instance.
This shares ownership of the other string's buffer
but views only a substring. */
String(String const& s, size_type pos, size_type count = npos)
: string_(s.string_)
, view_(s.data() + pos, std::min(count, s.size() - pos))
{
}
/** Construct by moving from another String instance.
The other instance is left as a null string. */
String(String&& s) noexcept
: string_(std::move(s.string_))
, view_(s.view_)
{
s.view_ = string_view();
}
/** Construct by copying from another String instance.
This shares ownership of the other string's buffer. */
String(String const&) noexcept = default;
~String() = default;
/** Assign by moving from another String instance.
The other instance is left as a null string. */
String& operator=(String&& s) noexcept
{
string_ = std::move(s.string_);
view_ = s.view_;
s.view_ = string_view();
return *this;
}
/** Assign by copying from another String instance.
This shares ownership of the other string's buffer. */
String& operator=(String const&) noexcept = default;
/** Assign from any type implementing the IntoString trait. */
template <typename T>
typename // NOLINT(*)
std::enable_if<IntoString<T>::value, String&>::type
operator=(T&& s)
{
*this = String(std::forward<T>(s));
return *this;
}
/** Assign via std::string initializer list constructor. */
String& operator=(std::initializer_list<char> il)
{
*this = String(il);
return *this;
}
/** Return true if the instance is not a null string. */
explicit operator bool() const noexcept { return data() != nullptr; }
/** Return a view of the string. */
string_view view() const noexcept { return view_; }
/** Return true if the instance is an empty stringn or null string. */
bool empty() const noexcept { return view_.empty(); }
/** Return a pointer to the start of the string. */
const char* data() const noexcept { return view_.data(); }
/** Return the length of the string in bytes. */
size_type size() const noexcept { return view_.size(); }
size_type length() const noexcept { return view_.length(); }
/** Return the character at the given position.
No bounds checking is performed. */
char operator[](size_type pos) const noexcept { return view_[pos]; }
/** Return the character at the given position.
If the position is out of bounds, throws std::out_of_range. */
char at(size_type pos) const { return view_.at(pos); }
char front() const noexcept { return view_.front(); }
char back() const noexcept { return view_.back(); }
/** Get a refernce to a normal std::string. The reference
is valid until this instance is mutated or destroyed. */
std::string const& str();
/** Get a pointer to a C-style null-terminated string
containing the same value as this instance. The pointer
is valid until this instance is mutated, destroyed,
or str() is called. */
const char* c_str();
const_iterator begin() const noexcept { return view_.begin(); }
const_iterator end() const noexcept { return view_.end(); }
const_iterator cbegin() const noexcept { return begin(); }
const_iterator cend() const noexcept { return end(); }
const_reverse_iterator rbegin() const noexcept { return view_.rbegin(); }
const_reverse_iterator rend() const noexcept { return view_.rend(); }
const_reverse_iterator crbegin() const noexcept { return rbegin(); }
const_reverse_iterator crend() const noexcept { return rend(); }
/** Append to the string using any type that implements the
AsStringView trait. */
template <typename T>
typename std::enable_if<AsStringView<T>::value, String&>::type operator+=(
T&& s)
{
string_view v = AsStringView<T>::view(std::forward<T>(s));
std::string r;
r.reserve(size() + v.size());
r.assign(data(), size());
r.append(v.data(), v.size());
return *this = std::move(r);
}
/** Assign to an empty string. */
void clear() { *this = String(string_view("", 0), Private()); }
/** Insert 'count' copies of 'ch' at position 'index'. */
String& insert(size_type index, size_type count, char ch);
/** Erase 'count' characters starting at position 'index'. */
String& erase(size_type index = 0, size_type count = npos);
void push_back(char ch)
{
std::string s;
s.reserve(size() + 1);
s.assign(data(), size());
s.push_back(ch);
*this = std::move(s);
}
void pop_back() { *this = String(*this, 0, size() - 1); }
template <typename T>
typename std::enable_if<AsStringView<T>::value, String&>::type replace(
size_type pos, size_type count, T&& s)
{
const_iterator first = begin() + pos;
const_iterator last = first + count;
return replace(first, last, std::forward<T>(s));
}
template <typename InputIterator>
String& replace(const_iterator first, const_iterator last,
InputIterator first2, InputIterator last2)
{
std::string out;
out.append(view_.begin(), first);
out.append(first2, last2);
out.append(last, view_.end());
return *this = std::move(out);
}
template <typename T>
typename std::enable_if<AsStringView<T>::value, String&>::type replace(
const_iterator first, const_iterator last, T&& s)
{
string_view v = AsStringView<T>::view(std::forward<T>(s));
std::string out;
out.reserve((first - view_.begin()) + v.size() + (view_.end() - last));
out.append(view_.begin(), first);
out.append(v.data(), v.size());
out.append(last, view_.end());
return *this = std::move(out);
}
template <typename T>
typename std::enable_if<AsStringView<T>::value, String&>::type replace(
size_type pos, size_type count, T&& s, size_type pos2,
size_type count2 = npos)
{
string_view v = AsStringView<T>::view(std::forward<T>(s));
v = v.substr(pos2, count2);
return replace(pos, count, v);
}
String& replace(size_type pos, size_type count, size_type count2, char ch)
{
const_iterator first = begin() + pos;
const_iterator last = first + count;
return replace(first, last, count2, ch);
}
String& replace(const_iterator first, const_iterator last, size_type count2,
char ch)
{
std::string out;
out.reserve((first - view_.begin()) + count2 + (view_.end() - last));
out.append(view_.begin(), first);
out.append(count2, ch);
out.append(last, view_.end());
return *this = std::move(out);
}
size_type copy(char* dest, size_type count, size_type pos = 0) const;
void resize(size_type count) { resize(count, char()); }
void resize(size_type count, char ch)
{
std::string s;
s.reserve(count);
if (count <= size()) {
s.assign(data(), count);
} else {
s.assign(data(), size());
s.resize(count, ch);
}
*this = std::move(s);
}
void swap(String& other)
{
std::swap(string_, other.string_);
std::swap(view_, other.view_);
}
/** Return a substring starting at position 'pos' and
consisting of at most 'count' characters. */
String substr(size_type pos = 0, size_type count = npos) const;
template <typename T>