00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved. 00005 * 00006 * This software is licensed as described in the file COPYING, which 00007 * you should have received as part of this distribution. The terms 00008 * are also available at http://subversion.tigris.org/license-1.html. 00009 * If newer versions of this license are posted there, you may use a 00010 * newer version instead, at your option. 00011 * 00012 * This software consists of voluntary contributions made by many 00013 * individuals. For exact contribution history, see the revision 00014 * history and logs, available at http://subversion.tigris.org/. 00015 * ==================================================================== 00016 * @endcopyright 00017 * 00018 * @file svn_utf.h 00019 * @brief UTF-8 conversion routines 00020 */ 00021 00022 00023 00024 #ifndef SVN_UTF_H 00025 #define SVN_UTF_H 00026 00027 #include <apr_xlate.h> 00028 00029 #include "svn_error.h" 00030 #include "svn_string.h" 00031 00032 #ifdef __cplusplus 00033 extern "C" { 00034 #endif /* __cplusplus */ 00035 00036 00037 /** @since New in 1.1. 00038 * Initialize the UTF-8 encoding/decoding routines. 00039 * Allocate cached translation handles in a subpool of @a pool. 00040 * NOTE: It is optional to call this function, but if it is used, no other 00041 * svn function may be in use in other threads during the call of this 00042 * function or when @a pool is cleared or destroyed. 00043 * Initializing the UTF-8 routines will improve performance. 00044 * NOTE: In svn 1.1.0, this function is a placeholder; we expect to 00045 * fill in the implementation in svn 1.1.1. 00046 */ 00047 void svn_utf_initialize (apr_pool_t *pool); 00048 00049 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00050 * allocate @a *dest in @a pool. 00051 */ 00052 svn_error_t *svn_utf_stringbuf_to_utf8 (svn_stringbuf_t **dest, 00053 const svn_stringbuf_t *src, 00054 apr_pool_t *pool); 00055 00056 00057 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00058 * @a *dest in @a pool. 00059 */ 00060 svn_error_t *svn_utf_string_to_utf8 (const svn_string_t **dest, 00061 const svn_string_t *src, 00062 apr_pool_t *pool); 00063 00064 00065 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00066 * allocate @a *dest in @a pool. 00067 */ 00068 svn_error_t *svn_utf_cstring_to_utf8 (const char **dest, 00069 const char *src, 00070 apr_pool_t *pool); 00071 00072 00073 /** Set @a *dest to a utf8-encoded C string from @a frompage C string 00074 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the 00075 * cache key for the charset converter; if it's NULL, don't cache the 00076 * converter. 00077 */ 00078 svn_error_t *svn_utf_cstring_to_utf8_ex (const char **dest, 00079 const char *src, 00080 const char *frompage, 00081 const char *convset_key, 00082 apr_pool_t *pool); 00083 00084 00085 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00086 * allocate @a *dest in @a pool. 00087 */ 00088 svn_error_t *svn_utf_stringbuf_from_utf8 (svn_stringbuf_t **dest, 00089 const svn_stringbuf_t *src, 00090 apr_pool_t *pool); 00091 00092 00093 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00094 * allocate @a *dest in @a pool. 00095 */ 00096 svn_error_t *svn_utf_string_from_utf8 (const svn_string_t **dest, 00097 const svn_string_t *src, 00098 apr_pool_t *pool); 00099 00100 00101 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00102 * allocate @a *dest in @a pool. 00103 */ 00104 svn_error_t *svn_utf_cstring_from_utf8 (const char **dest, 00105 const char *src, 00106 apr_pool_t *pool); 00107 00108 00109 /** Set @a *dest to a @a frompage encoded C string from utf8 C string 00110 * @a src; allocate @a *dest in @a pool. Use @a convset_key as the 00111 * cache key for the charset converter; if it's NULL, don't cache the 00112 * converter. 00113 */ 00114 svn_error_t *svn_utf_cstring_from_utf8_ex (const char **dest, 00115 const char *src, 00116 const char *topage, 00117 const char *convset_key, 00118 apr_pool_t *pool); 00119 00120 00121 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00122 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00123 * characters the same, and substitutes "?\\XXX" for others, where XXX 00124 * is the unsigned decimal code for that character. 00125 * 00126 * This function cannot error; it is guaranteed to return something. 00127 * First it will recode as described above and then attempt to convert 00128 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00129 * will return the raw fuzzily recoded string, which may or may not be 00130 * meaningful in the client's locale, but is (presumably) better than 00131 * nothing. 00132 * 00133 * ### Notes: 00134 * 00135 * Improvement is possible, even imminent. The original problem was 00136 * that if you converted a UTF-8 string (say, a log message) into a 00137 * locale that couldn't represent all the characters, you'd just get a 00138 * static placeholder saying "[unconvertible log message]". Then 00139 * Justin Erenkrantz pointed out how on platforms that didn't support 00140 * conversion at all, "svn log" would still fail completely when it 00141 * encountered unconvertible data. 00142 * 00143 * Now for both cases, the caller can at least fall back on this 00144 * function, which converts the message as best it can, substituting 00145 * ?\\XXX escape codes for the non-ascii characters. 00146 * 00147 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00148 * so when we can detect that at configure time, things will change. 00149 * Also, this should (?) be moved to apr/apu eventually. 00150 * 00151 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00152 * details. 00153 */ 00154 const char *svn_utf_cstring_from_utf8_fuzzy (const char *src, 00155 apr_pool_t *pool); 00156 00157 00158 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00159 * allocate @a *dest in @a pool. 00160 */ 00161 svn_error_t *svn_utf_cstring_from_utf8_stringbuf (const char **dest, 00162 const svn_stringbuf_t *src, 00163 apr_pool_t *pool); 00164 00165 00166 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00167 * allocate @a *dest in @a pool. 00168 */ 00169 svn_error_t *svn_utf_cstring_from_utf8_string (const char **dest, 00170 const svn_string_t *src, 00171 apr_pool_t *pool); 00172 00173 #ifdef __cplusplus 00174 } 00175 #endif /* __cplusplus */ 00176 00177 #endif /* SVN_XML_H */
1.3.5