/* libCStreamedXML/src/libCStreamedXML/100_types.h
 *
 *  (c)2006, Laurence Withers. Released under the GNU GPL. See file
 *  COPYING for more information / terms of license.
*/



/*! \brief String buffer.

This type implements a simple string buffer. The string should be kept null-terminated. The \a data
pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the
string length (excluding terminating null) and \a size is the size of the allocated buffer. It is
never valid for \a data or \a size to be 0.

*/
struct csxml_buf {
    /// Pointer to string (null terminated).
    char* data;

    /// Length of string (excluding null).
    size_t len;

    /// Size of allocated buffer.
    size_t size;
};



/*! \brief String list.

This type implements a dynamic array of strings (using the struct csxml_buf type). The \a data 
pointer is reallocated as necessary (i.e. when \a len becomes equal to \a size). \a len is the
number of valid elements in the array and \a size is the size of the allocated buffer. It is valid
for \a data and \a size to be zero.

*/
struct csxml_list {
    /// Array of buffers (all allocated elements are valid and initialised).
    struct csxml_buf* data;

    /// Number of elements in use.
    size_t len;

    /// Number of allocated elements.
    size_t size;
};



/*! \brief Streamed XML parser context.

This structure contains all the details of a Streamed XML parsing operation. Most fields are
internal to the parser and should not be touched; those are briefly documented here. You may want to
change the \a expandEntities option, examine the \a line and \a col variables, and change the
callback functions.

Those members marked Internal: are subject to change (either in semantics, type or existence).

*/
struct csxml {
    /// Option: change "&amp;lt;" to "&lt;", etc. if non-zero (the default). Change to 0 to leave
    /// entities inline.
    int expandEntities;

    /// Current line (from 0).
    int line;
    /// Current column (from 0).
    int col;

    /// List of attribute names for current element.
    struct csxml_list elemAttrNames;
    /// List of attribute values for current element.
    struct csxml_list elemAttrVals;

    /// User data.
    void* user;


    /*! \brief Error callback: Streamed XML is not well formed.

    \param ctx Parsing context.
    \param reason Human-readable description of error.

    This function is called whenever badly-formed Streamed XML is encountered (e.g. if content is
    encountered at stream-level).

    The default implementation prints a formatted message to \a stderr.

    */
    void (*notWellFormed)(const struct csxml* ctx, const char* reason);



    /*! \brief Error callback: out of memory.

    \param ctx Parsing context.
    \param amount Number of bytes we tried to allocate.

    This function is called whenever a dynamic string resizing operation (or similar) fails. It is 
    called with the number of bytes the library attempted to allocate.

    The default implementation prints a formatted message to \a stderr.

    \todo When parsing extremely long content sections, we could just call the content() callback
        when memory is low rather than failing altogether.

    */
    void (*outOfMemory)(const struct csxml* ctx, size_t amount);



    /*! \brief Error callback: reference to unknown entity.

    \param ctx Parsing context.
    \param ent Name of the referenced entity.

    If an entity is referenced by name (e.g. "&amp;myEntity;" is encountered) but the lookup in the 
    default entity list and through the entityRef() callback fails to resolve the content, this
    function is called.

    The default implementation prints a formatted message to \a stderr.

    */
    void (*unknownEntity)(const struct csxml* ctx, const char* ent);



    /*! \brief Callback: whitespace.

    \param ctx Parsing context.
    \param ws Pointer to string of whitespace.
    \returns Zero on success, non-zero on error.

    This function is called whenever a block of whitespace is encountered at stream level or
    immediately after some structural element (i.e. on leading whitespace within elements). It is
    generally safe to discard it, unless you need to reproduce the file verbatim. Once content is
    encountered within an element, any further whitespace will be reported through the content()
    callback, up to the next structural element.

    The default implementation discards the data.

    */
    int (*whiteSpace)(const struct csxml* ctx, const char* ws);



    /*! \brief Callback: content.

    \param ctx Parsing context.
    \param content Pointer to string of cdata.
    \returns Zero on success, non-zero on error.

    This function is called whenever textual content (cdata) is encountered inside an element. It
    may be called multiple times simultaneously, in which case the data from each call should be 
    concatenated. If \a expandEntities is non-zero, then this function will also be called with the
    expanded text from any entities encountered, rather than the entities themselves.

    The default implementation discards the data.

    */
    int (*content)(const struct csxml* ctx, const char* content);



    /*! \brief Callback: cdata block.

    \param ctx Parsing context.
    \param cdata Pointer to string of cdata.
    \returns Zero on success, non-zero on error.

    This function is called for all data inside a CDATA marked section. It is useful to 
    differentiate between cdata encoded in this manner and normal cdata if you are trying to
    reproduce the orginal file verbatim.

    The default implementation calls content() with the same data.

    */
    int (*cdata)(const struct csxml* ctx, const char* cdata);



    /*! \brief Callback: stream restart marker.

    \param ctx Parsing context.
    \param marker Pointer to marker string.
    \returns Zero on success, non-zero on error.

    Whenever a stream restart marker is encountered, this function is called with the data found
    inside the marker.

    The default implementation ignores the data (but note that the parser state is always reset, 
    regardless of what the callback does).

    */
    int (*streamRestart)(const struct csxml* ctx, const char* marker); 


    
    /*! \brief Callback: processing instruction.

    \param ctx Parsing context.
    \param target PI target.
    \param data PI data (may be 0).
    \returns Zero on success, non-zero on error.

    This callback is used to report processing instructions (PIs). If the PI has no data, then the
    \a data variable can be set to 0.

    The default implementation ignores the data.

    */
    int (*PI)(const struct csxml* ctx, const char* target, const char* data);



    /*! \brief Callback: comment.
    
    \param ctx Parsing context.
    \param comment Pointer to comment string data.
    \returns Zero on success, non-zero on error.

    This callback is used to report comments. It is only useful if you wish to reproduce the source
    file verbatim (for instance, you want to alter some data in a file but preserve user comments).

    The default implementation ignores the data.

    */
    int (*comment)(const struct csxml* ctx, const char* comment);



    /*! \brief Callback: element.
    
    \param ctx Parsing context.
    \param elemName Pointer to element name string.
    \param numAttrs Number of attributes.
    \returns Zero on success, non-zero on error.

    This callback is used whenever an element open tag is encountered. To parse attributes, use the
    numAttrs variable to determine how many there are, and then access them with:

    <pre>        // get attribute name
        const char* elemName = ctx->elemAttrNames.data[i].data;

        // get attribute value
        const char* elemValue = ctx->elemAttrVals.data[i].data;</pre>

    In the case of an empty element, the closeTag() callback will be called immediately afterwards.

    The default implemenation ignores the data.

    */
    int (*element)(const struct csxml* ctx, const char* elemName, int numAttrs);



    /*! \brief Callback: close tag.
    
    \param ctx Parsing context.
    \param elemName Element name.
    \returns Zero on success, non-zero on error.

    This function is called whenever an element close tag is encountered. The element name will 
    always match the open tag element name (if not, the Streamed XML is not well formed, and an
    error will be signalled before this callback is triggered).

    The default implementation ignores the data.

    */
    int (*closeTag)(const struct csxml* ctx, const char* elemName);



    /*! \brief Callback: entity reference.
    
    \param ctx Parsing context.
    \param ent Entity name.
    \retval 0 if the entity name does not match any known entity.
    \returns Pointer to null-terminated string data for expanding entity.

    This function is called whenever \a expandEntities is true and an entity is encountered in cdata
    or an attribute value. It is only called if the entity does not match one of the five built-in
    defaults. You only need to provide an implementation if you know about some additional entities.

    The default implementation returns 0 ("no such entity").

    */
    const char* (*entityRef)(const struct csxml* ctx, const char* ent);



    /// Internal: string buffer.
    struct csxml_buf buffer;
    /// Internal: string buffer.
    struct csxml_buf buffer2;
    /// Internal: string buffer.
    struct csxml_buf elemName;
    /// Internal: stack of element names, used to match open/close tags.
    struct csxml_list elemStack;
    /// Internal: state machine state.
    int state;
    /// Internal: for matching strings.
    int xmlCount;
    /// Internal: depth in element tree (0 = stream level).
    int elementDepth;
    /// Internal: for matching against restart marker string.
    int restartCount;
    /// Internal: for stripping windows line endings.
    int skipNextNewline; 
    /// Internal: flag to check if we are currently parsing an attribute while expanding an entity.
    int parsingAttr;
    /// Internal: flag to record if current attribute was quoted with ' or " char.
    int singleQuote;
    /// Internal: used to expand character entities.
    int entityChar;
};

/* options for text editors
kate: replace-trailing-space-save true; space-indent true; tab-width 4;
vim: expandtab:ts=4:sw=4
*/
