Program Listing for File StringUtils.cpp¶
↰ Return to documentation for file (src/microsoft/shortlist/utils/StringUtils.cpp
)
#include "microsoft/shortlist/utils/StringUtils.h"
#include <stdio.h>
#include <algorithm>
#include <string>
namespace marian {
namespace quicksand {
#include "microsoft/shortlist/logging/LoggerMacros.h"
std::string StringUtils::VarArgsToString(const char * format, va_list args) {
if (format == nullptr) {
LOG_ERROR_AND_THROW("'format' cannot be null in StringUtils::VarArgsToString");
}
std::string output;
// Most of the time the stack buffer (5000 chars) will be sufficient.
// In cases where this is insufficient, dynamically allocate an appropriately sized buffer
char buffer[5000];
#ifdef QUICKSAND_WINDOWS_BUILD
va_list copy;
va_copy(copy, args);
int ret = vsnprintf_s(buffer, sizeof(buffer), _TRUNCATE, format, copy);
va_end(copy);
if (ret >= 0) {
output = std::string(buffer, buffer + ret);
}
else {
va_list copy2;
va_copy(copy2, args);
int needed_size = _vscprintf(format, copy2);
va_end(copy2);
if (needed_size < 0) {
LOG_ERROR_AND_THROW("A call to vsnprintf_s() failed. This should never happen");
}
char * dynamic_buffer = new char[needed_size+1];
int ret2 = vsnprintf_s(dynamic_buffer, needed_size+1, _TRUNCATE, format, args);
if (ret2 >= 0) {
output = std::string(dynamic_buffer, dynamic_buffer + ret2);
delete[] dynamic_buffer;
}
else {
output = "";
delete[] dynamic_buffer;
LOG_ERROR_AND_THROW("A call to vsnprintf_s() failed. This should never happen, "
"since we made a call to _vscprintf() to check the dynamic buffer size. The call to _vscprintf() "
"returned %d bytes, but apparently that was not enough. This would imply a bug in MSVC's vsnprintf_s implementation.", needed_size);
}
}
#else
va_list copy;
va_copy(copy, args);
int needed_size = vsnprintf(buffer, sizeof(buffer), format, copy);
va_end(copy);
if (needed_size < (int)sizeof(buffer)) {
output = std::string(buffer, buffer + needed_size);
}
else {
char * dynamic_buffer = new char[needed_size+1];
int ret = vsnprintf(dynamic_buffer, needed_size + 1, format, args);
if (ret >= 0 && ret < needed_size + 1) {
output = std::string(dynamic_buffer);
delete[] dynamic_buffer;
}
else {
output = "";
delete[] dynamic_buffer;
LOG_ERROR_AND_THROW("A call to vsnprintf() failed. Return value: %d.",
ret);
}
}
#endif
return output;
}
std::vector<std::string> StringUtils::SplitIntoLines(const std::string& input) {
std::vector<std::string> output;
if (input.size() == 0) {
return output;
}
std::size_t start = 0;
for (std::size_t i = 0; i < input.size(); i++) {
char c = input[i];
if (c == '\r' || c == '\n') {
output.push_back(std::string(input.begin() + start, input.begin() + i));
start = i+1;
}
if (c == '\r' && i + 1 < input.size() && input[i+1] == '\n') {
i++;
start = i+1;
}
}
// do NOT put an empty length trailing line (but empty length intermediate lines are fine)
if (input.begin() + start != input.end()) {
output.push_back(std::string(input.begin() + start, input.end()));
}
return output;
}
bool StringUtils::StartsWith(const std::string& str, const std::string& prefix) {
if (str.length() < prefix.length())
return false;
return std::equal(prefix.begin(), prefix.end(), str.begin());
}
bool StringUtils::EndsWith(const std::string& str, const std::string& suffix) {
if (str.length() < suffix.length())
return false;
return std::equal(suffix.begin(), suffix.end(), str.end() - suffix.length());
}
std::vector<std::string> StringUtils::SplitFileList(const std::string& input) {
std::vector<std::string> output;
for (const std::string& s : SplitIntoLines(input)) {
for (const std::string& t : Split(s, ";")) {
std::string f = CleanupWhitespace(t);
output.push_back(f);
}
}
return output;
}
std::vector<std::string> StringUtils::Split(const std::string& input, char splitter) {
std::vector<std::string> output;
if (input.size() == 0) {
return output;
}
std::size_t start = 0;
for (std::size_t i = 0; i < input.size(); i++) {
if (input[i] == splitter) {
output.push_back(std::string(input.begin() + start, input.begin() + i));
start = i+1;
}
}
output.push_back(std::string(input.begin() + start, input.end()));
return output;
}
std::vector<std::string> StringUtils::Split(const std::string& input, const std::string& splitter) {
std::vector<std::string> output;
if (input.size() == 0) {
return output;
}
std::size_t pos = 0;
while (true) {
std::size_t next_pos = input.find(splitter, pos);
if (next_pos == std::string::npos) {
output.push_back(std::string(input.begin() + pos, input.end()));
break;
}
else {
output.push_back(std::string(input.begin() + pos, input.begin() + next_pos));
}
pos = next_pos + splitter.size();
}
return output;
}
std::string StringUtils::Join(const std::string& joiner, const uint8_t * items, int32_t length) {
std::ostringstream ss;
for (int32_t i = 0; i < length; i++) {
if (i != 0) {
ss << joiner;
}
ss << (int32_t)(items[i]);
}
return ss.str();
}
std::string StringUtils::Join(const std::string& joiner, const int8_t * items, int32_t length) {
std::ostringstream ss;
for (int32_t i = 0; i < length; i++) {
if (i != 0) {
ss << joiner;
}
ss << (int32_t)(items[i]);
}
return ss.str();
}
std::string StringUtils::PrintString(const char * format, ...) {
va_list args;
va_start(args, format);
std::string output = StringUtils::VarArgsToString(format, args);
va_end(args);
return output;
}
std::vector<std::string> StringUtils::WhitespaceTokenize(const std::string& input) {
std::vector<std::string> output;
if (input.size() == 0) {
return output;
}
std::size_t size = input.size();
std::size_t start = 0;
std::size_t end = size;
for (std::size_t i = 0; i < size; i++) {
char c = input[i];
if (IsWhitespace(c)) {
start++;
}
else {
break;
}
}
for (std::size_t i = 0; i < size; i++) {
char c = input[size-1-i];
if (IsWhitespace(c)) {
end--;
}
else {
break;
}
}
if (end <= start) {
return output;
}
bool prev_is_whitespace = false;
std::size_t token_start = start;
for (std::size_t i = start; i < end; i++) {
char c = input[i];
if (IsWhitespace(c)) {
if (!prev_is_whitespace) {
output.push_back(std::string(input.begin() + token_start, input.begin() + i));
}
prev_is_whitespace = true;
token_start = i+1;
}
else {
prev_is_whitespace = false;
}
}
output.push_back(std::string(input.begin() + token_start, input.begin() + end));
return output;
}
std::string StringUtils::CleanupWhitespace(const std::string& input) {
if (input.size() == 0) {
return std::string("");
}
std::size_t size = input.size();
std::size_t start = 0;
std::size_t end = size;
for (std::size_t i = 0; i < size; i++) {
char c = input[i];
if (IsWhitespace(c)) {
start++;
}
else {
break;
}
}
for (std::size_t i = 0; i < size; i++) {
char c = input[size-1-i];
if (IsWhitespace(c)) {
end--;
}
else {
break;
}
}
if (end <= start) {
return std::string("");
}
std::ostringstream ss;
bool prev_is_whitespace = false;
for (std::size_t i = start; i < end; i++) {
char c = input[i];
if (IsWhitespace(c)) {
if (!prev_is_whitespace) {
ss << ' ';
}
prev_is_whitespace = true;
}
else {
ss << c;
prev_is_whitespace = false;
}
}
return ss.str();
}
std::string StringUtils::XmlEscape(const std::string& str) {
std::ostringstream ss;
for (std::size_t i = 0; i < str.size(); i++) {
char c = str[i];
if (c == '&') {
ss << "&";
}
else if (c == '"') {
ss << """;
}
else if (c == '\'') {
ss << "'";
}
else if (c == '<') {
ss << "<";
}
else if (c == '>') {
ss << ">";
}
else {
ss << c;
}
}
return ss.str();
}
std::string StringUtils::ToString(const std::string& str) {
return str;
}
std::string StringUtils::ToString(bool obj) {
return (obj)?"true":"false";
}
std::string StringUtils::ToUpper(const std::string& str) {
std::vector<char> output;
output.reserve(str.size());
for (char c : str) {
output.push_back((char)toupper((int)c));
}
return std::string(output.begin(), output.end());
}
std::string StringUtils::ToLower(const std::string& str) {
std::ostringstream ss;
for (char c : str) {
ss << c;
}
return ss.str();
}
} // namespace quicksand
} // namespace marian