00001 /**
00002 * @copyright
00003 * ====================================================================
00004 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
00005 *
00006 * This software is licensed as described in the file COPYING, which
00007 * you should have received as part of this distribution. The terms
00008 * are also available at http://subversion.tigris.org/license-1.html.
00009 * If newer versions of this license are posted there, you may use a
00010 * newer version instead, at your option.
00011 *
00012 * This software consists of voluntary contributions made by many
00013 * individuals. For exact contribution history, see the revision
00014 * history and logs, available at http://subversion.tigris.org/.
00015 * ====================================================================
00016 * @endcopyright
00017 *
00018 * @file svn_path.h
00019 * @brief A path manipulation library
00020 *
00021 * All incoming and outgoing paths are non-null and in UTF-8, unless
00022 * otherwise documented.
00023 *
00024 * No result path ever ends with a separator, no matter whether the
00025 * path is a file or directory, because we always canonicalize() it.
00026 *
00027 * All paths passed to the @c svn_path_xxx functions, with the exceptions of
00028 * the @c svn_path_canonicalize and @c svn_path_internal_style functions, must
00029 * be in canonical form.
00030 */
00031
00032 #ifndef SVN_PATH_H
00033 #define SVN_PATH_H
00034
00035
00036 #include <apr_pools.h>
00037 #include <apr_tables.h>
00038
00039 #include "svn_string.h"
00040 #include "svn_error.h"
00041
00042
00043 #ifdef __cplusplus
00044 extern "C" {
00045 #endif /* __cplusplus */
00046
00047
00048
00049 /** Convert @a path from the local style to the canonical internal style. */
00050 const char *svn_path_internal_style (const char *path, apr_pool_t *pool);
00051
00052 /** Convert @a path from the canonical internal style to the local style. */
00053 const char *svn_path_local_style (const char *path, apr_pool_t *pool);
00054
00055
00056 /** Join a base path (@a base) with a component (@a component), allocated in
00057 * @a pool.
00058 *
00059 * If either @a base or @a component is the empty path, then the other
00060 * argument will be copied and returned. If both are the empty path the
00061 * empty path is returned.
00062 *
00063 * If the @a component is an absolute path, then it is copied and returned.
00064 * Exactly one slash character ('/') is used to joined the components,
00065 * accounting for any trailing slash in @a base.
00066 *
00067 * Note that the contents of @a base are not examined, so it is possible to
00068 * use this function for constructing URLs, or for relative URLs or
00069 * repository paths.
00070 *
00071 * This function is NOT appropriate for native (local) file
00072 * paths. Only for "internal" canonicalized paths, since it uses '/'
00073 * for the separator. Further, an absolute path (for @a component) is
00074 * based on a leading '/' character. Thus, an "absolute URI" for the
00075 * @a component won't be detected. An absolute URI can only be used
00076 * for the base.
00077 */
00078 char *svn_path_join (const char *base,
00079 const char *component,
00080 apr_pool_t *pool);
00081
00082 /** Join multiple components onto a @a base path, allocated in @a pool. The
00083 * components are terminated by a @c NULL.
00084 *
00085 * If any component is the empty string, it will be ignored.
00086 *
00087 * If any component is an absolute path, then it resets the base and
00088 * further components will be appended to it.
00089 *
00090 * See @c svn_path_join() for further notes about joining paths.
00091 */
00092 char *svn_path_join_many (apr_pool_t *pool, const char *base, ...);
00093
00094
00095 /** Get the basename of the specified canonicalized @a path. The
00096 * basename is defined as the last component of the path (ignoring any
00097 * trailing slashes). If the @a path is root ("/"), then that is
00098 * returned. Otherwise, the returned value will have no slashes in
00099 * it.
00100 *
00101 * Example: svn_path_basename("/foo/bar") -> "bar"
00102 *
00103 * The returned basename will be allocated in @a pool.
00104 *
00105 * Note: if an empty string is passed, then an empty string will be returned.
00106 */
00107 char *svn_path_basename (const char *path, apr_pool_t *pool);
00108
00109 /** Get the dirname of the specified canonicalized @a path, defined as
00110 * the path with its basename removed.
00111 *
00112 * Get the dirname of the specified @a path, defined as the path with its
00113 * basename removed. If @a path is root ("/"), it is returned unchanged.
00114 *
00115 * The returned dirname will be allocated in @a pool.
00116 */
00117 char *svn_path_dirname (const char *path, apr_pool_t *pool);
00118
00119 /** Return the number of components in the canonicalized @a path. */
00120 apr_size_t
00121 svn_path_component_count (const char *path);
00122
00123 /** Add a @a component (a null-terminated C-string) to the
00124 * canonicalized @a path. @a component is allowed to contain
00125 * directory separators.
00126 *
00127 * If @a path is non-empty, append the appropriate directory separator
00128 * character, and then @a component. If @a path is empty, simply set it to
00129 * @a component; don't add any separator character.
00130 *
00131 * If the result ends in a separator character, then remove the separator.
00132 */
00133 void svn_path_add_component (svn_stringbuf_t *path,
00134 const char *component);
00135
00136 /** Remove one component off the end of the canonicalized @a path. */
00137 void svn_path_remove_component (svn_stringbuf_t *path);
00138
00139 /** Remove @a n components off the end of the canonizalized @a path.
00140 * Equivalent to calling @c svn_remove_component @a n times. */
00141 void svn_path_remove_components (svn_stringbuf_t *path, apr_size_t n);
00142
00143 /** Divide the canonicalized @a path into @a *dirpath and @a
00144 * *base_name, allocated in @a pool.
00145 *
00146 * If @a dirpath or @a base_name is null, then don't set that one.
00147 *
00148 * Either @a dirpath or @a base_name may be @a path's own address, but they
00149 * may not both be the same address, or the results are undefined.
00150 *
00151 * If @a path has two or more components, the separator between @a dirpath
00152 * and @a base_name is not included in either of the new names.
00153 *
00154 * examples:
00155 * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre>
00156 * - <pre>"/bar" ==> "/" and "bar"</pre>
00157 * - <pre>"/" ==> "/" and "/"</pre>
00158 * - <pre>"bar" ==> "" and "bar"</pre>
00159 * - <pre>"" ==> "" and ""</pre>
00160 */
00161 void svn_path_split (const char *path,
00162 const char **dirpath,
00163 const char **base_name,
00164 apr_pool_t *pool);
00165
00166
00167 /** Return non-zero iff @a path is empty ("") or represents the current
00168 * directory -- that is, if prepending it as a component to an existing
00169 * path would result in no meaningful change.
00170 */
00171 int svn_path_is_empty (const char *path);
00172
00173
00174 /** Return a new path (or URL) like @a path, but transformed such that
00175 * some types of path specification redundancies are removed.
00176 *
00177 * This involves collapsing redundant "/./" elements, removing
00178 * multiple adjacent separator characters, removing trailing
00179 * separator characters, and possibly other semantically inoperative
00180 * transformations.
00181 *
00182 * The returned path may be statically allocated, equal to @a path, or
00183 * allocated from @a pool.
00184 */
00185 const char *svn_path_canonicalize (const char *path, apr_pool_t *pool);
00186
00187
00188 /** Return an integer greater than, equal to, or less than 0, according
00189 * as @a path1 is greater than, equal to, or less than @a path2.
00190 */
00191 int svn_path_compare_paths (const char *path1, const char *path2);
00192
00193
00194 /** Return the longest common path shared by two canonicalized paths,
00195 * @a path1 and @a path2. If there's no common ancestor, return the
00196 * empty path.
00197 *
00198 * @a path1 and @a path2 may be URLs. In order for two URLs to have
00199 * a common ancestor, they must (a) have the same protocol (since two URLs
00200 * with the same path but different protocols may point at completely
00201 * different resources), and (b) share a common ancestor in their path
00202 * component, i.e. 'protocol://' is not a sufficient ancestor.
00203 */
00204 char *svn_path_get_longest_ancestor (const char *path1,
00205 const char *path2,
00206 apr_pool_t *pool);
00207
00208 /** Convert @a relative canonicalized path to an absolute path and
00209 * return the results in @a *pabsolute, allocated in @a pool.
00210 *
00211 * @a relative may be a URL, in which case no attempt is made to convert it,
00212 * and a copy of the URL is returned.
00213 */
00214 svn_error_t *
00215 svn_path_get_absolute (const char **pabsolute,
00216 const char *relative,
00217 apr_pool_t *pool);
00218
00219 /** Return the path part of the canonicalized @a path in @a
00220 * *pdirectory, and the file part in @a *pfile. If @a path is a
00221 * directory, set @a *pdirectory to @a path, and @a *pfile to the
00222 * empty string. If @a path does not exist it is treated as if it is
00223 * a file, since directories do not normally vanish.
00224 */
00225 svn_error_t *
00226 svn_path_split_if_file(const char *path,
00227 const char **pdirectory,
00228 const char **pfile,
00229 apr_pool_t *pool);
00230
00231 /** Find the common prefix of the canonicalized paths in @a targets
00232 * (an array of @a const char *'s), and remove redundant paths if @a
00233 * remove_redundancies is true.
00234 *
00235 * - Set @a *pcommon to the absolute path of the path or URL common to
00236 * all of the targets. If the targets have no common prefix, or
00237 * are a mix of URLs and local paths, set @a *pcommon to the
00238 * empty string.
00239 *
00240 * - If @a pcondensed_targets is non-null, set @a *pcondensed_targets
00241 * to an array of targets relative to @a *pcommon, and if
00242 * @a remove_redundancies is true, omit any paths/URLs that are
00243 * descendants of another path/URL in @a targets. If *pcommon
00244 * is empty, @a *pcondensed_targets will contain full URLs and/or
00245 * absolute paths; redundancies can still be removed (from both URLs
00246 * and paths). If @a pcondensed_targets is null, leave it alone.
00247 *
00248 * Else if there is exactly one target, then
00249 *
00250 * - Set @a *pcommon to that target, and
00251 *
00252 * - If @a pcondensed_targets is non-null, set @a *pcondensed_targets
00253 * to an array containing zero elements. Else if
00254 * @a pcondensed_targets is null, leave it alone.
00255 *
00256 * If there are no items in @a targets, set @a *pcommon and (if
00257 * applicable) @a *pcondensed_targets to @c NULL.
00258 *
00259 * NOTE: There is no guarantee that @a *pcommon is within a working
00260 * copy. */
00261 svn_error_t *
00262 svn_path_condense_targets (const char **pcommon,
00263 apr_array_header_t **pcondensed_targets,
00264 const apr_array_header_t *targets,
00265 svn_boolean_t remove_redundancies,
00266 apr_pool_t *pool);
00267
00268
00269 /** Copy a list of canonicalized @a targets, one at a time, into @a
00270 * pcondensed_targets, omitting any targets that are found earlier in
00271 * the list, or whose ancestor is found earlier in the list. Ordering
00272 * of targets in the original list is preserved in the condensed list
00273 * of targets. Use @a pool for any allocations.
00274 *
00275 * How does this differ in functionality from @c svn_path_condense_targets?
00276 *
00277 * Here's the short version:
00278 *
00279 * 1. Disclaimer: if you wish to debate the following, talk to Karl. :-)
00280 * Order matters for updates because a multi-arg update is not
00281 * atomic, and CVS users are used to, when doing 'cvs up targetA
00282 * targetB' seeing targetA get updated, then targetB. I think the
00283 * idea is that if you're in a time-sensitive or flaky-network
00284 * situation, a user can say, "I really *need* to update
00285 * wc/A/D/G/tau, but I might as well update my whole working copy if
00286 * I can." So that user will do 'svn up wc/A/D/G/tau wc', and if
00287 * something dies in the middles of the 'wc' update, at least the
00288 * user has 'tau' up-to-date.
00289 *
00290 * 2. Also, we have this notion of an anchor and a target for updates
00291 * (the anchor is where the update editor is rooted, the target is
00292 * the actual thing we want to update). I needed a function that
00293 * would NOT screw with my input paths so that I could tell the
00294 * difference between someone being in A/D and saying 'svn up G' and
00295 * being in A/D/G and saying 'svn up .' -- believe it or not, these
00296 * two things don't mean the same thing. @c svn_path_condense_targets
00297 * plays with absolute paths (which is fine, so does
00298 * @c svn_path_remove_redundancies), but the difference is that it
00299 * actually tweaks those targets to be relative to the "grandfather
00300 * path" common to all the targets. Updates don't require a
00301 * "grandfather path" at all, and even if it did, the whole
00302 * conversion to an absolute path drops the crucial difference
00303 * between saying "i'm in foo, update bar" and "i'm in foo/bar,
00304 * update '.'"
00305 */
00306 svn_error_t *
00307 svn_path_remove_redundancies (apr_array_header_t **pcondensed_targets,
00308 const apr_array_header_t *targets,
00309 apr_pool_t *pool);
00310
00311
00312 /** Decompose the canonicalized @a path into an array of <tt>const
00313 * char *</tt> components, allocated in @a pool. If @a path is
00314 * absolute, the first component will be a lone dir separator (the
00315 * root directory).
00316 */
00317 apr_array_header_t *svn_path_decompose (const char *path,
00318 apr_pool_t *pool);
00319
00320
00321 /** Test that @a name is a single path component, that is:
00322 * - not @c NULL or empty.
00323 * - not a `/'-separated directory path
00324 * - not empty or `..'
00325 */
00326 svn_boolean_t svn_path_is_single_path_component (const char *name);
00327
00328
00329 /**
00330 * @since New in 1.1.
00331 *
00332 * Test to see if a backpath, i.e. '..', is present in @a path.
00333 * If not, return @c FALSE.
00334 * If so, return @c TRUE.
00335 */
00336 svn_boolean_t svn_path_is_backpath_present (const char *path);
00337
00338
00339 /** Test if @a path2 is a child of @a path1.
00340 * If not, return @c NULL.
00341 * If so, return a copy of the remainder path, allocated in @a pool.
00342 * (The remainder is the component which, added to @a path1, yields
00343 * @a path2. The remainder does not begin with a dir separator.)
00344 *
00345 * Both paths must be in canonical form, and must either be absolute,
00346 * or contain no ".." components.
00347 *
00348 * ### todo: the ".." restriction is unfortunate, and would ideally
00349 * be lifted by making the implementation smarter. But this is not
00350 * trivial: if the path is "../foo", how do you know whether or not
00351 * the current directory is named "foo" in its parent?
00352 */
00353 const char *svn_path_is_child (const char *path1,
00354 const char *path2,
00355 apr_pool_t *pool);
00356
00357 /**
00358 * @since New in 1.2.
00359 *
00360 * Check whether @a path is a valid Subversion path.
00361 *
00362 * A valid Subversion pathname is a UTF-8 string without control
00363 * characters. "Valid" means Subversion can store the pathname in
00364 * a repository. There may be other, OS-specific, limitations on
00365 * what paths can be represented in a working copy.
00366 *
00367 * ASSUMPTION: @a path is a valid UTF-8 string. This function does
00368 * not check UTF-8 validity.
00369 *
00370 * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if
00371 * invalid.
00372 */
00373 svn_error_t *svn_path_check_valid (const char *path, apr_pool_t *pool);
00374
00375
00376 /** URI/URL stuff
00377 *
00378 * @defgroup svn_path_uri_stuff URI/URL stuff
00379 * @{
00380 */
00381
00382 /** Return @c TRUE iff @a path looks like a valid URL, @c FALSE otherwise. */
00383 svn_boolean_t svn_path_is_url (const char *path);
00384
00385 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */
00386 svn_boolean_t svn_path_is_uri_safe (const char *path);
00387
00388 /** Return a URI-encoded copy of @a path, allocated in @a pool. */
00389 const char *svn_path_uri_encode (const char *path, apr_pool_t *pool);
00390
00391 /** Return a URI-decoded copy of @a path, allocated in @a pool. */
00392 const char *svn_path_uri_decode (const char *path, apr_pool_t *pool);
00393
00394 /** Extend @a url by a single @a component, URI-encoding that @a component
00395 * before adding it to the @a url. Return the new @a url, allocated in
00396 * @a pool. Notes: if @a component is @c NULL, just return a copy or @a url
00397 * allocated in @a pool; if @a component is already URI-encoded, calling
00398 * code should just use <tt>svn_path_join (url, component, pool)</tt>. @a url
00399 * does not need to be a canonical path, it may have trailing '/'.
00400 */
00401 const char *svn_path_url_add_component (const char *url,
00402 const char *component,
00403 apr_pool_t *pool);
00404
00405 /**
00406 * @since New in 1.1.
00407 *
00408 * Convert @a iri (Internationalized URI) to an URI.
00409 * The return value may be the same as @a iri if it was already
00410 * a URI. Else, allocate the return value in @a pool. */
00411 const char *svn_path_uri_from_iri (const char *iri,
00412 apr_pool_t *pool);
00413
00414 /**
00415 * @since New in 1.1.
00416 *
00417 * URI-encode certain characters in @a uri that are not valid in an URI, but
00418 * doesn't have any special meaning in @a uri at their positions. If no
00419 * characters need escaping, just return @a uri.
00420 *
00421 * NOTE: Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `.
00422 * This may be extended in the future to do context-dependent escaping.
00423 */
00424 const char *svn_path_uri_autoescape (const char *uri,
00425 apr_pool_t *pool);
00426
00427 /** @} */
00428
00429 /** Charset conversion stuff
00430 *
00431 * @defgroup svn_path_charset_stuff Charset conversion stuff
00432 * @{
00433 */
00434
00435 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */
00436 svn_error_t *svn_path_cstring_from_utf8 (const char **path_apr,
00437 const char *path_utf8,
00438 apr_pool_t *pool);
00439
00440 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */
00441 svn_error_t *svn_path_cstring_to_utf8 (const char **path_utf8,
00442 const char *path_apr,
00443 apr_pool_t *pool);
00444
00445
00446 /** @} */
00447
00448 #ifdef __cplusplus
00449 }
00450 #endif /* __cplusplus */
00451
00452
00453 #endif /* SVN_PATH_H */
1.2.14 written by Dimitri van Heesch,
© 1997-2002