Subversion
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
svn_utf.h
Go to the documentation of this file.
1 /**
2  * @copyright
3  * ====================================================================
4  * Copyright (c) 2000-2004, 2008 CollabNet. All rights reserved.
5  *
6  * This software is licensed as described in the file COPYING, which
7  * you should have received as part of this distribution. The terms
8  * are also available at http://subversion.tigris.org/license-1.html.
9  * If newer versions of this license are posted there, you may use a
10  * newer version instead, at your option.
11  *
12  * This software consists of voluntary contributions made by many
13  * individuals. For exact contribution history, see the revision
14  * history and logs, available at http://subversion.tigris.org/.
15  * ====================================================================
16  * @endcopyright
17  *
18  * @file svn_utf.h
19  * @brief UTF-8 conversion routines
20  * Whenever a conversion routine cannot convert to or from UTF-8, the
21  * error returned has code @c APR_EINVAL.
22  */
23 
24 
25 
26 #ifndef SVN_UTF_H
27 #define SVN_UTF_H
28 
29 #include <apr_pools.h>
30 #include <apr_xlate.h> /* for APR_*_CHARSET */
31 
32 #include "svn_types.h"
33 #include "svn_string.h"
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif /* __cplusplus */
38 
39 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
40 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
41 
42 /**
43  * Initialize the UTF-8 encoding/decoding routines.
44  * Allocate cached translation handles in a subpool of @a pool.
45  *
46  * @note It is optional to call this function, but if it is used, no other
47  * svn function may be in use in other threads during the call of this
48  * function or when @a pool is cleared or destroyed.
49  * Initializing the UTF-8 routines will improve performance.
50  *
51  * @since New in 1.1.
52  */
53 void
54 svn_utf_initialize(apr_pool_t *pool);
55 
56 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
57  * allocate @a *dest in @a pool.
58  */
61  const svn_stringbuf_t *src,
62  apr_pool_t *pool);
63 
64 
65 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
66  * @a *dest in @a pool.
67  */
70  const svn_string_t *src,
71  apr_pool_t *pool);
72 
73 
74 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
75  * allocate @a *dest in @a pool.
76  */
78 svn_utf_cstring_to_utf8(const char **dest,
79  const char *src,
80  apr_pool_t *pool);
81 
82 
83 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
84  * string @a src; allocate @a *dest in @a pool.
85  *
86  * @since New in 1.4.
87  */
89 svn_utf_cstring_to_utf8_ex2(const char **dest,
90  const char *src,
91  const char *frompage,
92  apr_pool_t *pool);
93 
94 
95 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
96  * ignored.
97  *
98  * @deprecated Provided for backward compatibility with the 1.3 API.
99  */
101 svn_error_t *
102 svn_utf_cstring_to_utf8_ex(const char **dest,
103  const char *src,
104  const char *frompage,
105  const char *convset_key,
106  apr_pool_t *pool);
107 
108 
109 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
110  * allocate @a *dest in @a pool.
111  */
112 svn_error_t *
114  const svn_stringbuf_t *src,
115  apr_pool_t *pool);
116 
117 
118 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
119  * allocate @a *dest in @a pool.
120  */
121 svn_error_t *
123  const svn_string_t *src,
124  apr_pool_t *pool);
125 
126 
127 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
128  * allocate @a *dest in @a pool.
129  */
130 svn_error_t *
131 svn_utf_cstring_from_utf8(const char **dest,
132  const char *src,
133  apr_pool_t *pool);
134 
135 
136 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
137  * @a src; allocate @a *dest in @a pool.
138  *
139  * @since New in 1.4.
140  */
141 svn_error_t *
142 svn_utf_cstring_from_utf8_ex2(const char **dest,
143  const char *src,
144  const char *topage,
145  apr_pool_t *pool);
146 
147 
148 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
149  * ignored.
150  *
151  * @deprecated Provided for backward compatibility with the 1.3 API.
152  */
154 svn_error_t *
155 svn_utf_cstring_from_utf8_ex(const char **dest,
156  const char *src,
157  const char *topage,
158  const char *convset_key,
159  apr_pool_t *pool);
160 
161 
162 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
163  * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii
164  * characters the same, and substitutes "?\\XXX" for others, where XXX
165  * is the unsigned decimal code for that character.
166  *
167  * This function cannot error; it is guaranteed to return something.
168  * First it will recode as described above and then attempt to convert
169  * the (new) 7-bit UTF-8 string to native encoding. If that fails, it
170  * will return the raw fuzzily recoded string, which may or may not be
171  * meaningful in the client's locale, but is (presumably) better than
172  * nothing.
173  *
174  * ### Notes:
175  *
176  * Improvement is possible, even imminent. The original problem was
177  * that if you converted a UTF-8 string (say, a log message) into a
178  * locale that couldn't represent all the characters, you'd just get a
179  * static placeholder saying "[unconvertible log message]". Then
180  * Justin Erenkrantz pointed out how on platforms that didn't support
181  * conversion at all, "svn log" would still fail completely when it
182  * encountered unconvertible data.
183  *
184  * Now for both cases, the caller can at least fall back on this
185  * function, which converts the message as best it can, substituting
186  * "?\\XXX" escape codes for the non-ascii characters.
187  *
188  * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
189  * so when we can detect that at configure time, things will change.
190  * Also, this should (?) be moved to apr/apu eventually.
191  *
192  * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
193  * details.
194  */
195 const char *
196 svn_utf_cstring_from_utf8_fuzzy(const char *src,
197  apr_pool_t *pool);
198 
199 
200 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
201  * allocate @a *dest in @a pool.
202  */
203 svn_error_t *
204 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
205  const svn_stringbuf_t *src,
206  apr_pool_t *pool);
207 
208 
209 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
210  * allocate @a *dest in @a pool.
211  */
212 svn_error_t *
213 svn_utf_cstring_from_utf8_string(const char **dest,
214  const svn_string_t *src,
215  apr_pool_t *pool);
216 
217 #ifdef __cplusplus
218 }
219 #endif /* __cplusplus */
220 
221 #endif /* SVN_UTF_H */