// Copyright (c) 2009-2010 Satoshi Nakamoto // Copyright (c) 2009-2020 The Bitcoin Core developers // Copyright (c) 2016-2024 The Hush developers // Distributed under the GPLv3 software license, see the accompanying // file COPYING or https://www.gnu.org/licenses/gpl-3.0.en.html #include "util/strencodings.h" #include "util/string.h" #include #include #include #include #include #include #include using namespace std; static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; static const std::string SAFE_CHARS[] = { CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI }; std::string SanitizeString(const std::string& str, int rule) { std::string strResult; for (std::string::size_type i = 0; i < str.size(); i++) { if (SAFE_CHARS[rule].find(str[i]) != std::string::npos) strResult.push_back(str[i]); } return strResult; } string SanitizeFilename(const string& str) { /** * safeChars chosen to restrict filename, keeping it simple to avoid cross-platform issues. * http://stackoverflow.com/a/2306003 */ static string safeChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890"); string strResult; for (std::string::size_type i = 0; i < str.size(); i++) { if (safeChars.find(str[i]) != std::string::npos) strResult.push_back(str[i]); } return strResult; } std::string HexInt(uint32_t val) { std::stringstream ss; ss << std::setfill('0') << std::setw(sizeof(uint32_t) * 2) << std::hex << val; return ss.str(); } uint32_t ParseHexToUInt32(const std::string& str) { std::istringstream converter(str); uint32_t value; converter >> std::hex >> value; return value; } const signed char p_util_hexdigit[256] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1, -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, }; signed char HexDigit(char c) { return p_util_hexdigit[(unsigned char)c]; } bool IsHex(const std::string& str) { for(std::string::const_iterator it(str.begin()); it != str.end(); ++it) { if (HexDigit(*it) < 0) return false; } return (str.size() > 0) && (str.size()%2 == 0); } bool IsHexNumber(const std::string& str) { size_t starting_location = 0; if (str.size() > 2 && *str.begin() == '0' && *(str.begin()+1) == 'x') { starting_location = 2; } for (const char c : str.substr(starting_location)) { if (HexDigit(c) < 0) return false; } // Return false for empty string or "0x". return (str.size() > starting_location); } std::vector ParseHex(const char* psz) { // convert hex dump to vector std::vector vch; while (true) { while (IsSpace(*psz)) psz++; signed char c = HexDigit(*psz++); if (c == (signed char)-1) break; unsigned char n = (c << 4); c = HexDigit(*psz++); if (c == (signed char)-1) break; n |= c; vch.push_back(n); } return vch; } std::vector ParseHex(const std::string& str) { return ParseHex(str.c_str()); } void SplitHostPort(std::string in, int &portOut, std::string &hostOut) { size_t colon = in.find_last_of(':'); // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator bool fHaveColon = colon != in.npos; bool fBracketed = fHaveColon && (in[0]=='[' && in[colon-1]==']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe bool fMultiColon = fHaveColon && (in.find_last_of(':',colon-1) != in.npos); if (fHaveColon && (colon==0 || fBracketed || !fMultiColon)) { int32_t n; if (ParseInt32(in.substr(colon + 1), &n) && n > 0 && n < 0x10000) { in = in.substr(0, colon); portOut = n; } } if (in.size()>0 && in[0] == '[' && in[in.size()-1] == ']') hostOut = in.substr(1, in.size()-2); else hostOut = in; } std::string EncodeBase64(Span input) { static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; std::string str; str.reserve(((input.size() + 2) / 3) * 4); ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end()); while (str.size() % 4) str += '='; return str; } std::string EncodeBase64(const unsigned char* pch, size_t len) { static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; std::string str; str.reserve(((len + 2) / 3) * 4); ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, pch, pch + len); while (str.size() % 4) str += '='; return str; } std::string EncodeBase64(const std::string& str) { return EncodeBase64((const unsigned char*)str.data(), str.size()); } std::vector DecodeBase64(const char* p, bool* pf_invalid) { static const int decode64_table[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; const char* e = p; std::vector val; val.reserve(strlen(p)); while (*p != 0) { int x = decode64_table[(unsigned char)*p]; if (x == -1) break; val.push_back(x); ++p; } std::vector ret; ret.reserve((val.size() * 3) / 4); bool valid = ConvertBits<6, 8, false>([&](unsigned char c) { ret.push_back(c); }, val.begin(), val.end()); const char* q = p; while (valid && *p != 0) { if (*p != '=') { valid = false; break; } ++p; } valid = valid && (p - e) % 4 == 0 && p - q < 4; if (pf_invalid) *pf_invalid = !valid; return ret; } std::string DecodeBase64(const std::string& str, bool* pf_invalid) { if (!ValidAsCString(str)) { if (pf_invalid) { *pf_invalid = true; } return {}; } std::vector vchRet = DecodeBase64(str.c_str(), pf_invalid); return std::string((const char*)vchRet.data(), vchRet.size()); } std::string EncodeBase32(Span input, bool pad) { static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; std::string str; str.reserve(((input.size() + 4) / 5) * 8); ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end()); if (pad) { while (str.size() % 8) { str += '='; } } return str; } std::string EncodeBase32(const std::string& str, bool pad) { return EncodeBase32(MakeUCharSpan(str), pad); } std::vector DecodeBase32(const char* p, bool* pf_invalid) { static const int decode32_table[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; const char* e = p; std::vector val; val.reserve(strlen(p)); while (*p != 0) { int x = decode32_table[(unsigned char)*p]; if (x == -1) break; val.push_back(x); ++p; } std::vector ret; ret.reserve((val.size() * 5) / 8); bool valid = ConvertBits<5, 8, false>([&](unsigned char c) { ret.push_back(c); }, val.begin(), val.end()); const char* q = p; while (valid && *p != 0) { if (*p != '=') { valid = false; break; } ++p; } valid = valid && (p - e) % 8 == 0 && p - q < 8; if (pf_invalid) *pf_invalid = !valid; return ret; } std::string DecodeBase32(const std::string& str, bool* pf_invalid) { if (!ValidAsCString(str)) { if (pf_invalid) { *pf_invalid = true; } return {}; } std::vector vchRet = DecodeBase32(str.c_str(), pf_invalid); return std::string((const char*)vchRet.data(), vchRet.size()); } static bool ParsePrechecks(const std::string& str) { if (str.empty()) // No empty string allowed return false; if (str.size() >= 1 && (IsSpace(str[0]) || IsSpace(str[str.size()-1]))) // No padding allowed return false; if (!ValidAsCString(str)) // No embedded NUL characters allowed return false; return true; } bool ParseInt32(const std::string& str, int32_t *out) { if (!ParsePrechecks(str)) return false; char *endp = nullptr; errno = 0; // strtol will not set errno if valid long int n = strtol(str.c_str(), &endp, 10); if(out) *out = (int32_t)n; // Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow // we still have to check that the returned value is within the range of an *int32_t*. On 64-bit // platforms the size of these types may be different. return endp && *endp == 0 && !errno && n >= std::numeric_limits::min() && n <= std::numeric_limits::max(); } bool ParseInt64(const std::string& str, int64_t *out) { if (!ParsePrechecks(str)) return false; char *endp = nullptr; errno = 0; // strtoll will not set errno if valid long long int n = strtoll(str.c_str(), &endp, 10); if(out) *out = (int64_t)n; // Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow // we still have to check that the returned value is within the range of an *int64_t*. return endp && *endp == 0 && !errno && n >= std::numeric_limits::min() && n <= std::numeric_limits::max(); } bool ParseUInt8(const std::string& str, uint8_t *out) { uint32_t u32; if (!ParseUInt32(str, &u32) || u32 > std::numeric_limits::max()) { return false; } if (out != nullptr) { *out = static_cast(u32); } return true; } bool ParseUInt32(const std::string& str, uint32_t *out) { if (!ParsePrechecks(str)) return false; if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range return false; char *endp = nullptr; errno = 0; // strtoul will not set errno if valid unsigned long int n = strtoul(str.c_str(), &endp, 10); if(out) *out = (uint32_t)n; // Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow // we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit // platforms the size of these types may be different. return endp && *endp == 0 && !errno && n <= std::numeric_limits::max(); } bool ParseUInt64(const std::string& str, uint64_t *out) { if (!ParsePrechecks(str)) return false; if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range return false; char *endp = nullptr; errno = 0; // strtoull will not set errno if valid unsigned long long int n = strtoull(str.c_str(), &endp, 10); if(out) *out = (uint64_t)n; // Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow // we still have to check that the returned value is within the range of an *uint64_t*. return endp && *endp == 0 && !errno && n <= std::numeric_limits::max(); } bool ParseDouble(const std::string& str, double *out) { if (!ParsePrechecks(str)) return false; if (str.size() >= 2 && str[0] == '0' && str[1] == 'x') // No hexadecimal floats allowed return false; std::istringstream text(str); text.imbue(std::locale::classic()); double result; text >> result; if(out) *out = result; return text.eof() && !text.fail(); } std::string FormatParagraph(const std::string& in, size_t width, size_t indent) { std::stringstream out; size_t ptr = 0; size_t indented = 0; while (ptr < in.size()) { size_t lineend = in.find_first_of('\n', ptr); if (lineend == std::string::npos) { lineend = in.size(); } const size_t linelen = lineend - ptr; const size_t rem_width = width - indented; if (linelen <= rem_width) { out << in.substr(ptr, linelen + 1); ptr = lineend + 1; indented = 0; } else { size_t finalspace = in.find_last_of(" \n", ptr + rem_width); if (finalspace == std::string::npos || finalspace < ptr) { // No place to break; just include the entire word and move on finalspace = in.find_first_of("\n ", ptr); if (finalspace == std::string::npos) { // End of the string, just add it and break out << in.substr(ptr); break; } } out << in.substr(ptr, finalspace - ptr) << "\n"; if (in[finalspace] == '\n') { indented = 0; } else if (indent) { out << std::string(indent, ' '); indented = indent; } ptr = finalspace + 1; } } return out.str(); } std::string i64tostr(int64_t n) { return strprintf("%d", n); } std::string itostr(int n) { return strprintf("%d", n); } int64_t atoi64(const char* psz) { #ifdef _MSC_VER return _atoi64(psz); #else return strtoll(psz, nullptr, 10); #endif } int64_t atoi64(const std::string& str) { #ifdef _MSC_VER return _atoi64(str.c_str()); #else return strtoll(str.c_str(), nullptr, 10); #endif } int atoi(const std::string& str) { return atoi(str.c_str()); } /** Upper bound for mantissa. * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer. * Larger integers cannot consist of arbitrary combinations of 0-9: * * 999999999999999999 1^18-1 * 9223372036854775807 (1<<63)-1 (max int64_t) * 9999999999999999999 1^19-1 (would overflow) */ static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL; /** Helper function for ParseFixedPoint */ static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros) { if(ch == '0') ++mantissa_tzeros; else { for (int i=0; i<=mantissa_tzeros; ++i) { if (mantissa > (UPPER_BOUND / 10LL)) return false; /* overflow */ mantissa *= 10; } mantissa += ch - '0'; mantissa_tzeros = 0; } return true; } bool ParseFixedPoint(const std::string &val, int decimals, int64_t *amount_out) { int64_t mantissa = 0; int64_t exponent = 0; int mantissa_tzeros = 0; bool mantissa_sign = false; bool exponent_sign = false; int ptr = 0; int end = val.size(); int point_ofs = 0; if (ptr < end && val[ptr] == '-') { mantissa_sign = true; ++ptr; } if (ptr < end) { if (val[ptr] == '0') { /* pass single 0 */ ++ptr; } else if (val[ptr] >= '1' && val[ptr] <= '9') { while (ptr < end && IsDigit(val[ptr])) { if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) return false; /* overflow */ ++ptr; } } else return false; /* missing expected digit */ } else return false; /* empty string or loose '-' */ if (ptr < end && val[ptr] == '.') { ++ptr; if (ptr < end && IsDigit(val[ptr])) { while (ptr < end && IsDigit(val[ptr])) { if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) return false; /* overflow */ ++ptr; ++point_ofs; } } else return false; /* missing expected digit */ } if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E')) { ++ptr; if (ptr < end && val[ptr] == '+') ++ptr; else if (ptr < end && val[ptr] == '-') { exponent_sign = true; ++ptr; } if (ptr < end && IsDigit(val[ptr])) { while (ptr < end && IsDigit(val[ptr])) { if (exponent > (UPPER_BOUND / 10LL)) return false; /* overflow */ exponent = exponent * 10 + val[ptr] - '0'; ++ptr; } } else return false; /* missing expected digit */ } if (ptr != end) return false; /* trailing garbage */ /* finalize exponent */ if (exponent_sign) exponent = -exponent; exponent = exponent - point_ofs + mantissa_tzeros; /* finalize mantissa */ if (mantissa_sign) mantissa = -mantissa; /* convert to one 64-bit fixed-point value */ exponent += decimals; if (exponent < 0) return false; /* cannot represent values smaller than 10^-decimals */ if (exponent >= 18) return false; /* cannot represent values larger than or equal to 10^(18-decimals) */ for (int i=0; i < exponent; ++i) { if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL)) return false; /* overflow */ mantissa *= 10; } if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND) return false; /* overflow */ if (amount_out) *amount_out = mantissa; return true; } std::string ToLower(const std::string& str) { std::string r; for (auto ch : str) r += ToLower((unsigned char)ch); return r; } std::string ToUpper(const std::string& str) { std::string r; for (auto ch : str) r += ToUpper((unsigned char)ch); return r; } std::string Capitalize(std::string str) { if (str.empty()) return str; str[0] = ToUpper(str.front()); return str; } std::string HexStr(const Span s) { std::string rv; static constexpr char hexmap[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; rv.reserve(s.size() * 2); for (uint8_t v: s) { rv.push_back(hexmap[v >> 4]); rv.push_back(hexmap[v & 15]); } return rv; }