00001 /**
00002 * @copyright
00003 * ====================================================================
00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
00005 *
00006 * This software is licensed as described in the file COPYING, which
00007 * you should have received as part of this distribution. The terms
00008 * are also available at http://subversion.tigris.org/license-1.html.
00009 * If newer versions of this license are posted there, you may use a
00010 * newer version instead, at your option.
00011 *
00012 * This software consists of voluntary contributions made by many
00013 * individuals. For exact contribution history, see the revision
00014 * history and logs, available at http://subversion.tigris.org/.
00015 * ====================================================================
00016 * @endcopyright
00017 *
00018 * @file svn_utf.h
00019 * @brief UTF-8 conversion routines
00020 */
00021
00022
00023
00024 #ifndef SVN_UTF_H
00025 #define SVN_UTF_H
00026
00027 #include <apr_xlate.h>
00028
00029 #include "svn_error.h"
00030 #include "svn_string.h"
00031
00032 #ifdef __cplusplus
00033 extern "C" {
00034 #endif /* __cplusplus */
00035
00036
00037 /**
00038 * @since New in 1.1.
00039 *
00040 * Initialize the UTF-8 encoding/decoding routines.
00041 * Allocate cached translation handles in a subpool of @a pool.
00042 *
00043 * @note It is optional to call this function, but if it is used, no other
00044 * svn function may be in use in other threads during the call of this
00045 * function or when @a pool is cleared or destroyed.
00046 * Initializing the UTF-8 routines will improve performance.
00047 */
00048 void svn_utf_initialize (apr_pool_t *pool);
00049
00050 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
00051 * allocate @a *dest in @a pool.
00052 */
00053 svn_error_t *svn_utf_stringbuf_to_utf8 (svn_stringbuf_t **dest,
00054 const svn_stringbuf_t *src,
00055 apr_pool_t *pool);
00056
00057
00058 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
00059 * @a *dest in @a pool.
00060 */
00061 svn_error_t *svn_utf_string_to_utf8 (const svn_string_t **dest,
00062 const svn_string_t *src,
00063 apr_pool_t *pool);
00064
00065
00066 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
00067 * allocate @a *dest in @a pool.
00068 */
00069 svn_error_t *svn_utf_cstring_to_utf8 (const char **dest,
00070 const char *src,
00071 apr_pool_t *pool);
00072
00073
00074 /** Set @a *dest to a utf8-encoded C string from @a frompage C string
00075 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the
00076 * cache key for the charset converter; if it's NULL, don't cache the
00077 * converter.
00078 */
00079 svn_error_t *svn_utf_cstring_to_utf8_ex (const char **dest,
00080 const char *src,
00081 const char *frompage,
00082 const char *convset_key,
00083 apr_pool_t *pool);
00084
00085
00086 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
00087 * allocate @a *dest in @a pool.
00088 */
00089 svn_error_t *svn_utf_stringbuf_from_utf8 (svn_stringbuf_t **dest,
00090 const svn_stringbuf_t *src,
00091 apr_pool_t *pool);
00092
00093
00094 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
00095 * allocate @a *dest in @a pool.
00096 */
00097 svn_error_t *svn_utf_string_from_utf8 (const svn_string_t **dest,
00098 const svn_string_t *src,
00099 apr_pool_t *pool);
00100
00101
00102 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
00103 * allocate @a *dest in @a pool.
00104 */
00105 svn_error_t *svn_utf_cstring_from_utf8 (const char **dest,
00106 const char *src,
00107 apr_pool_t *pool);
00108
00109
00110 /** Set @a *dest to a @a frompage encoded C string from utf8 C string
00111 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the
00112 * cache key for the charset converter; if it's NULL, don't cache the
00113 * converter.
00114 */
00115 svn_error_t *svn_utf_cstring_from_utf8_ex (const char **dest,
00116 const char *src,
00117 const char *topage,
00118 const char *convset_key,
00119 apr_pool_t *pool);
00120
00121
00122 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
00123 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii
00124 * characters the same, and substitutes "?\\XXX" for others, where XXX
00125 * is the unsigned decimal code for that character.
00126 *
00127 * This function cannot error; it is guaranteed to return something.
00128 * First it will recode as described above and then attempt to convert
00129 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it
00130 * will return the raw fuzzily recoded string, which may or may not be
00131 * meaningful in the client's locale, but is (presumably) better than
00132 * nothing.
00133 *
00134 * ### Notes:
00135 *
00136 * Improvement is possible, even imminent. The original problem was
00137 * that if you converted a UTF-8 string (say, a log message) into a
00138 * locale that couldn't represent all the characters, you'd just get a
00139 * static placeholder saying "[unconvertible log message]". Then
00140 * Justin Erenkrantz pointed out how on platforms that didn't support
00141 * conversion at all, "svn log" would still fail completely when it
00142 * encountered unconvertible data.
00143 *
00144 * Now for both cases, the caller can at least fall back on this
00145 * function, which converts the message as best it can, substituting
00146 * ?\\XXX escape codes for the non-ascii characters.
00147 *
00148 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
00149 * so when we can detect that at configure time, things will change.
00150 * Also, this should (?) be moved to apr/apu eventually.
00151 *
00152 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
00153 * details.
00154 */
00155 const char *svn_utf_cstring_from_utf8_fuzzy (const char *src,
00156 apr_pool_t *pool);
00157
00158
00159 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
00160 * allocate @a *dest in @a pool.
00161 */
00162 svn_error_t *svn_utf_cstring_from_utf8_stringbuf (const char **dest,
00163 const svn_stringbuf_t *src,
00164 apr_pool_t *pool);
00165
00166
00167 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
00168 * allocate @a *dest in @a pool.
00169 */
00170 svn_error_t *svn_utf_cstring_from_utf8_string (const char **dest,
00171 const svn_string_t *src,
00172 apr_pool_t *pool);
00173
00174 #ifdef __cplusplus
00175 }
00176 #endif /* __cplusplus */
00177
00178 #endif /* SVN_UTF_H */
1.2.14 written by Dimitri van Heesch,
© 1997-2002