module String:sig
..end
Strings, substrings, string sets and maps.
A string s
of length l
is a zero-based indexed sequence of l
bytes. An index i
of s
is an integer in the range
[0
;l-1
], it represents the i
th byte of s
which can be
accessed using the string indexing operator s.[i]
.
Important. OCaml's string
s became immutable since 4.02.
Whenever possible compile your code with the -safe-string
option. This module does not expose any mutable operation on
strings and assumes strings are immutable. See the
porting guide.
typet =
string
The type for strings. Finite sequences of immutable bytes.
val empty : string
empty
is an empty string.
val v : len:int -> (int -> char) -> string
v len f
is a string s
of length len
with s.[i] = f
i
for all indices i
of s
. f
is invoked
in increasing index order.
Invalid_argument
if len
is not in the range [0
;
Sys.max_string_length
].val length : string -> int
length s
is the number of bytes in s
.
val get : string -> int -> char
get s i
is the byte of s
' at index i
. This is
equivalent to the s.[i]
notation.
Invalid_argument
if i
is not an index of s
.val get_byte : string -> int -> int
get_byte s i
is Char.to_int (get s i)
val head : ?rev:bool -> string -> char option
head s
is Some (get s h)
with h = 0
if rev = false
(default) or
h = length s - 1
if rev = true
. None
is returned if s
is
empty.
val get_head : ?rev:bool -> string -> char
get_head s
is like Astring.String.head
but
Invalid_argument
if s
is empty.val hash : string -> int
hash s
is Hashtbl.hash
s
.
val append : string -> string -> string
append s s'
appends s'
to s
. This is equivalent to
s ^ s'
.
Invalid_argument
if the result is longer than
Sys.max_string_length
.val concat : ?sep:string -> string list -> string
concat ~sep ss
concatenates the list of strings ss
, separating
each consecutive elements in the list ss
with sep
(defaults to
Astring.String.empty
).
Invalid_argument
if the result is longer than
Sys.max_string_length
.val is_empty : string -> bool
is_empty s
is length s = 0
.
val is_prefix : affix:string -> string -> bool
is_prefix ~affix s
is true
iff affix.[i] = s.[i]
for
all indices i
of affix
.
val is_infix : affix:string -> string -> bool
is_infix ~affix s
is true
iff there exists an index j
in s
such
that for all indices i
of affix
we have affix.[i] = s.[j + i]
.
val is_suffix : affix:string -> string -> bool
is_suffix ~affix s
is true iff affix.[n - i] = s.[m - i]
for all
indices i
of affix
with n = String.length affix - 1
and m =
String.length s - 1
.
val for_all : (char -> bool) -> string -> bool
for_all p s
is true
iff for all indices i
of s
, p s.[i]
= true
.
val exists : (char -> bool) -> string -> bool
exists p s
is true
iff there exists an index i
of s
with
p s.[i] = true
.
val equal : string -> string -> bool
equal s s'
is s = s'
.
val compare : string -> string -> int
compare s s'
is Stdlib.compare s s'
, it compares the
byte sequences of s
and s'
in lexicographical order.
Tip. These functions extract substrings as new strings. Using substrings may be less wasteful and more flexible.
val with_range : ?first:int -> ?len:int -> string -> string
with_range ~first ~len s
are the consecutive bytes of s
whose
indices exist in the range [first
;first + len - 1
].
first
defaults to 0
and len
to max_int
. Note that
first
can be any integer and len
any positive integer.
Invalid_argument
if len
is negative.val with_index_range : ?first:int -> ?last:int -> string -> string
with_index_range ~first ~last s
are the consecutive bytes of
s
whose indices exist in the range [first
;last
].
first
defaults to 0
and last
to String.length s - 1
.
Note that both first
and last
can be any integer. If
first > last
the interval is empty and the empty string
is returned.
val trim : ?drop:(char -> bool) -> string -> string
trim ~drop s
is s
with prefix and suffix bytes satisfying
drop
in s
removed. drop
defaults to Astring.Char.Ascii.is_white
.
val span : ?rev:bool ->
?min:int -> ?max:int -> ?sat:(char -> bool) -> string -> string * string
span ~rev ~min ~max ~sat s
is (l, r)
where:
rev
is false
(default), l
is at least min
and at most max
consecutive sat
satisfying initial bytes of
s
or Astring.String.empty
if there are no such bytes. r
are the remaining
bytes of s
.rev
is true
, r
is at least min
and at most max
consecutive sat
satisfying final bytes of s
or Astring.String.empty
if there are no such bytes. l
are the remaining
the bytes of s
.If max
is unspecified the span is unlimited. If min
is unspecified it defaults to 0
. If min > max
the condition
can't be satisfied and the left or right span, depending on rev
, is
always empty. sat
defaults to (fun _ -> true)
.
The invariant l ^ r = s
holds.
Invalid_argument
if max
or min
is negative.val take : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> string -> string
take ~rev ~min ~max ~sat s
is the matching span of Astring.String.span
without
the remaining one. In other words:
(if rev then snd else fst) @@ span ~rev ~min ~max ~sat s
val drop : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> string -> string
drop ~rev ~min ~max ~sat s
is the remaining span of Astring.String.span
without
the matching span. In other words:
(if rev then fst else snd) @@ span ~rev ~min ~max ~sat s
val cut : ?rev:bool -> sep:string -> string -> (string * string) option
cut ~sep s
is either the pair Some (l,r)
of the two
(possibly empty) substrings of s
that are delimited by the
first match of the non empty separator string sep
or None
if
sep
can't be matched in s
. Matching starts from the
beginning of s
(rev
is false
, default) or the end (rev
is true
).
The invariant l ^ sep ^ r = s
holds.
Invalid_argument
if sep
is the empty string.val cuts : ?rev:bool -> ?empty:bool -> sep:string -> string -> string list
cuts sep s
is the list of all substrings of s
that are
delimited by matches of the non empty separator string
sep
. Empty substrings are omitted in the list if empty
is
false
(defaults to true
).
Matching separators in s
starts from the beginning of s
(rev
is false
, default) or the end (rev
is true
). Once
one is found, the separator is skipped and matching starts
again, that is separator matches can't overlap. If there is no
separator match in s
, the list [s]
is returned.
The following invariants hold:
concat ~sep (cuts ~empty:true ~sep s) = s
cuts ~empty:true ~sep s <> []
Invalid_argument
if sep
is the empty string.val fields : ?empty:bool -> ?is_sep:(char -> bool) -> string -> string list
fields ~empty ~is_sep s
is the list of (possibly empty)
substrings that are delimited by bytes for which is_sep
is
true
. Empty substrings are omitted in the list if empty
is
false
(defaults to true
). is_sep
defaults to
Astring.Char.Ascii.is_white
.
type
sub
The type for substrings.
val sub : ?start:int -> ?stop:int -> string -> sub
sub
is Astring.String.Sub.v
.
val sub_with_range : ?first:int -> ?len:int -> string -> sub
sub_with_range
is like Astring.String.with_range
but returns a substring
value. If first
is smaller than 0
the empty string at the start
of s
is returned. If first
is greater than the last index of s
the empty string at the end of s
is returned.
val sub_with_index_range : ?first:int -> ?last:int -> string -> sub
sub_with_index_range
is like Astring.String.with_index_range
but returns
a substring value. If first
and last
are smaller than 0
the empty string at the start of s
is returned. If first
and
is greater than the last index of s
the empty string at
the end of s
is returned. If first > last
and first
is an
index of s
the empty string at first
is returned.
module Sub:sig
..end
Substrings.
val find : ?rev:bool -> ?start:int -> (char -> bool) -> string -> int option
find ~rev ~start sat s
is:
rev
is false
(default). The smallest index i
, if any,
greater or equal to start
such that sat s.[i]
is true
.
start
defaults to 0
.rev
is true
. The greatest index i
, if any, smaller or equal
to start
such that sat s.[i]
is true
.
start
defaults to String.length s - 1
.Note that start
can be any integer.
val find_sub : ?rev:bool -> ?start:int -> sub:string -> string -> int option
find_sub ~rev ~start ~sub s
is:
rev
is false
(default). The smallest index i
, if any,
greater or equal to start
such that sub
can be found starting
at i
in s
that is s.[i] = sub.[0]
, s.[i+1] = sub.[1]
, ...
start
defaults to 0
.rev
is true
. The greatest index i
, if any, smaller
or equal to start
such that sub
can be found starting at
i
in s
that is s.[i] = sub.[0]
, s.[i+1] = sub.[1]
, ...
start
defaults to String.length s - 1
.Note that start
can be any integer.
val filter : (char -> bool) -> string -> string
filter sat s
is the string made of the bytes of s
that satisfy sat
,
in the same order.
val filter_map : (char -> char option) -> string -> string
filter_map f s
is the string made of the bytes of s
as mapped by
f
, in the same order.
val map : (char -> char) -> string -> string
map f s
is s'
with s'.[i] = f s.[i]
for all indices i
of s
. f
is invoked in increasing index order.
val mapi : (int -> char -> char) -> string -> string
mapi f s
is s'
with s'.[i] = f i s.[i]
for all indices i
of s
. f
is invoked in increasing index order.
val fold_left : ('a -> char -> 'a) -> 'a -> string -> 'a
fold_left f acc s
is
f (
...(f (f acc s.[0]) s.[1])
...) s.[m]
with m = String.length s - 1
.
val fold_right : (char -> 'a -> 'a) -> string -> 'a -> 'a
fold_right f s acc
is
f s.[0] (f s.[1] (
...(f s.[m] acc) )
...)
with m = String.length s - 1
.
val iter : (char -> unit) -> string -> unit
iter f s
is f s.[0]; f s.[1];
...
f s.[m]
with m = String.length s - 1
.
val iteri : (int -> char -> unit) -> string -> unit
iteri f s
is f 0 s.[0]; f 1 s.[1];
...
f m s.[m]
with m = String.length s - 1
.
val uniquify : string list -> string list
uniquify ss
is ss
without duplicates, the list order is
preserved.
module Ascii:sig
..end
US-ASCII string support.
val pp : Stdlib.Format.formatter -> string -> unit
pp ppf s
prints s
's bytes on ppf
.
val dump : Stdlib.Format.formatter -> string -> unit
dump ppf s
prints s
as a syntactically valid OCaml string on
ppf
using Astring.String.Ascii.escape_string
.
type
set
The type for string sets.
module Set:sig
..end
String sets.
module Map:sig
..end
String maps.
type'a
map ='a Map.t
The type for maps from strings to values of type 'a.
val of_char : char -> string
of_char c
is a string that contains the byte c
.
val to_char : string -> char option
to_char s
is the single byte in s
or None
if there is no byte
or more than one in s
.
val of_bool : bool -> string
of_bool b
is a string representation for b
. Relies on
Stdlib.string_of_bool
.
val to_bool : string -> bool option
to_bool s
is a bool
from s
, if any. Relies on
Stdlib.bool_of_string
.
val of_int : int -> string
of_int i
is a string representation for i
. Relies on
Stdlib.string_of_int
.
val to_int : string -> int option
to_int
is an int
from s
, if any. Relies on
Stdlib.int_of_string
.
val of_nativeint : nativeint -> string
of_nativeint i
is a string representation for i
. Relies on
Nativeint.of_string
.
val to_nativeint : string -> nativeint option
to_nativeint
is an nativeint
from s
, if any. Relies on
Nativeint.to_string
.
val of_int32 : int32 -> string
of_int32 i
is a string representation for i
. Relies on
Int32.of_string
.
val to_int32 : string -> int32 option
to_int32
is an int32
from s
, if any. Relies on
Int32.to_string
.
val of_int64 : int64 -> string
of_int64 i
is a string representation for i
. Relies on
Int64.of_string
.
val to_int64 : string -> int64 option
to_int64
is an int64
from s
, if any. Relies on
Int64.to_string
.
val of_float : float -> string
of_float f
is a string representation for f
. Relies on
Stdlib.string_of_float
.
val to_float : string -> float option
to_float s
is a float
from s
, if any. Relies
on Stdlib.float_of_string
.