You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
4.4 KiB
C++

// Copyright 2008, Google Inc. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// 3. Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This file declares the C++ ExpatHandler interface to the otherwise C expat.
#ifndef KML_BASE_EXPAT_HANDLER_H__
#define KML_BASE_EXPAT_HANDLER_H__
#include "expat.h" // XML_Char
#include "string_util.h" // StringVector
namespace kmlbase {
class Attributes;
// This declares the pure virtual ExpatHandler interface.
class ExpatHandler {
public:
virtual ~ExpatHandler() {}
virtual void StartElement(const string& name,
const StringVector& atts) = 0;
virtual void EndElement(const string& name) = 0;
virtual void CharData(const string&) = 0;
// Namespace handlers with an empty default implementation.
virtual void StartNamespace(const string& prefix,
const string& uri) {}
virtual void EndNamespace(const string& prefix) {}
void set_parser(XML_Parser parser) {
parser_ = parser;
}
XML_Parser get_parser() {
return parser_;
}
private:
XML_Parser parser_;
};
const int kBitMask = 0x3f;
const int kByteMask = 0x80;
const int kMask2Bytes = 0xc0;
const int kMask3Bytes = 0xe0;
// Convert an XML_Char buffer to a UTF-8 encoded string. Even
// if Expat is compiled with Unicode, XML_Char will point to a UTF-16
// encoded character. It's not known in practice if Expat will actually
// allow surrogate pairs, but our interface is a pointer in case we find
// an exception to the Unicode's book assertion that no interesting languages
// are represented outside the first 64K Unicode characters.
inline void xmlchar_to_utf8(const XML_Char *input, string* buffer) {
if (!input || !buffer)
return;
const int c = *input;
// Rely on constant folding and inlining to make this fast when not
// built with XML_UNICODE; this function should optimize down to an
// inlined buffer.push_back().
if (sizeof(XML_Char) == 1 || c < 0x80) {
buffer->push_back(static_cast<char>(c));
} else if (c < 0x800) {
buffer->push_back(kMask2Bytes | c >> 6);
buffer->push_back(kByteMask | (c & kBitMask));
} else if (c < 0xd800 || c > 0xdbff) {
buffer->push_back(kMask3Bytes | c >> 12);
buffer->push_back(kByteMask | ((c >> 6) & kBitMask));
buffer->push_back(kByteMask | (c & kBitMask));
} else {
// Handle UTF-16 surrogate pairs here. We 'handle' them by dropping them.
}
}
inline string xml_char_to_string(const XML_Char *input) {
string output;
for (const XML_Char *p = input; input && *p; p++) {
xmlchar_to_utf8(p, &output);
}
return output;
}
inline void xml_char_to_string_vec(const XML_Char **input,
kmlbase::StringVector *ovec) {
if (!ovec)
return;
while (input && *input) {
ovec->push_back(xml_char_to_string(*input++));
ovec->push_back(xml_char_to_string(*input++));
}
}
inline string xml_char_to_string_n(const XML_Char *input, size_t length) {
string output;
while (length--) {
xmlchar_to_utf8(input++, &output);
}
return output;
}
} // end namespace kmlbase
#endif // KML_BASE_EXPAT_HANDLER_H__