#include <stddef.h>
#include <string.h>
#include <limits.h>
#include "cbl/assert.h"
#include "cbl/memory.h"
#include "text.h"
Data Structures | |
struct | text_save_t |
struct | chunk |
Defines | |
#define | IDX(i, len) (((i) <= 0)? (i) + (len): (i) - 1) |
#define | ISATEND(s, n) ((s).str+(s).len == current->avail && (n) <= current->limit-current->avail) |
#define | EQUAL(s, i, t) (memcmp(&(s).str[i], (t).str, (t).len) == 0) |
#define | SWAP(i, j) |
Functions | |
int() | text_pos (text_t s, int i) |
normalizes a text position. | |
text_t() | text_box (const char *str, int len) |
boxes a null-terminated string to construct a text. | |
text_t() | text_sub (text_t s, int i, int j) |
constructs a sub-text of a text. | |
text_t() | text_put (const char *str) |
constructs a text from a null-terminated string. | |
text_t() | text_gen (const char str[], int size) |
constructs a text from an array of characters. | |
char *() | text_get (char *str, int size, text_t s) |
converts a text to a C string. | |
text_t() | text_dup (text_t s, int n) |
constructs a text by duplicating another text. | |
text_t() | text_cat (text_t s1, text_t s2) |
constructs a text by concatenating two texts. | |
text_t() | text_reverse (text_t s) |
constructs a text by reversing a text. | |
text_t() | text_map (text_t s, const text_t *from, const text_t *to) |
constructs a text by converting a text based on a specified mapping. | |
int() | text_cmp (text_t s1, text_t s2) |
compares two texts. | |
text_save_t *() | text_save (void) |
saves the current top of the text space. | |
void() | text_restore (text_save_t **save) |
restores a saved state of the text space. | |
int() | text_chr (text_t s, int i, int j, int c) |
finds the first occurrence of a character in a text. | |
int() | text_rchr (text_t s, int i, int j, int c) |
finds the last occurrence of a character in a text. | |
int() | text_upto (text_t s, int i, int j, text_t set) |
finds the first occurrence of any character from a set in a text. | |
int() | text_rupto (text_t s, int i, int j, text_t set) |
finds the last occurrence of any character from a set in a text. | |
int() | text_find (text_t s, int i, int j, text_t str) |
finds the first occurrence of a text in a text. | |
int() | text_rfind (text_t s, int i, int j, text_t str) |
finds the last occurrence of a text in a text. | |
int() | text_any (text_t s, int i, text_t set) |
checks if a character of a specified position matches any character from a set. | |
int() | text_many (text_t s, int i, int j, text_t set) |
finds the end of a span consisted of characters from a set. | |
int() | text_rmany (text_t s, int i, int j, text_t set) |
finds the start of a span consisted of characters from a set. | |
int() | text_match (text_t s, int i, int j, text_t str) |
checks if a text starts with another text. | |
int() | text_rmatch (text_t s, int i, int j, text_t str) |
checks if a text ends with another text. | |
Variables | |
const text_t | text_ucase = { 26, "ABCDEFGHIJKLMNOPQRSTUVWXYZ" } |
const text_t | text_lcase = { 26, "abcdefghijklmnopqrstuvwxyz" } |
const text_t | text_digits = { 10, "0123456789" } |
const text_t | text_null = { 0, "" } |
#define SWAP | ( | i, | |||
j | ) |
Value:
do { \ int t = i; \ i = j; \ j = t; \ } while(0)
checks if a character of a specified position matches any character from a set.
text_any() checks if a character of a specified position by i
in a text s
matches any character from a set set
. i
specifies the left position of a character. If it matches, text_any() returns the right positive position of the character or 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) c a c a o s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_any(t, 2, text_box("ca", 2)) gives 3 because a
matches. If the set containing characters to find is empty, text_any() always fails and returns 0.
Note that giving to i
the last position (7 or 0 in the example text) makes text_any() fail and return 0; that does not cause the assertion to fail since it is a valid position.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or set
[in] | s | text in which character is to be found |
[in] | i | left position of character to match |
[in] | set | set text containing characters to find |
text_t() text_box | ( | const char * | str, | |
int | len | |||
) |
boxes a null-terminated string to construct a text.
text_box() "boxes" a constant string or a string whose storage is already allocated properly by a user. Unlike text_put(), text_box() does not copy a given string and the length of a text is granted by a user. text_box() is useful especially when constructing a text representation for a string literal:
Note, in the above example, that the terminating null character is excluded by the length given to text_box(). If a user gives 7 for the length, the resulting text includes a null character, which constructs a different text from what the above call makes.
An empty text whose length is 0 is allowed. It can be constructed simply as in the following example:
and a predefined empty text, text_null
is also provided for convenience.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid string or length given for str
or len
[in] | str | string to box for text representation |
[in] | len | length of string to box |
constructs a text by concatenating two texts.
text_cat() constructs a new text by concatenating s2
to s1
.
Possible exceptions: assert_exceptfail, memory_exceptfail
Unchecked errors: invalid text given for s1
or s2
s2
to s1
, which means only the returned text has the concatenated result.[in] | s1 | text to which another text is to be concatenated |
[in] | s2 | text to concatenate |
int() text_chr | ( | text_t | s, | |
int | i, | |||
int | j, | |||
int | c | |||
) |
finds the first occurrence of a character in a text.
text_chr() finds the first occurrence of a character c
in the specified range of a text s
. The range is specified by i
and j
. If found, text_chr() returns the left position of the found character. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) e v e n t s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_chr(t, -6, 5, 'e') gives 1 while text_chr(t, -6, 5, 's') does 0.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
[in] | s | text in which character is to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | c | character to find |
compares two texts.
text_cmp() compares two texts as strcmp() does strings except that a null character is not treated specially by the former.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s1
or s2
[in] | s1 | text to compare |
[in] | s2 | text to compare |
negative | s1 compares less than s2 | |
0 | s1 compares equal to s2 | |
positive | s1 compares larger than s2 |
constructs a text by duplicating another text.
text_dup() takes a text and constructs a text that duplicates the original text n
times. For example, the following call
constructs as the result a text: samplesamplesample
Possible exceptions: assert_exceptfail, memory_exceptfail
Unchecked errors: invalid text given for s
[in] | s | text to duplicate |
[in] | n | number of duplication |
finds the first occurrence of a text in a text.
text_find() finds the first occurrence of a text str
in the specified range of a text s
. The range is specified by i
and j
. If found, text_find() returns the left position of the character starting the found text. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) c a c a o s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_find(t, 6, -6, text_box("ca", 2)) gives 1. If str
is empty, text_find() always succeeds and returns the left positive position of the specified range.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or str
[in] | s | text in which another text is to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | str | text to find |
text_t() text_gen | ( | const char | str[], | |
int | size | |||
) |
constructs a text from an array of characters.
text_gen() copies size
characters from str
to the text space and returns a text representing the copied characters. The terminating null character is considered an ordinary character if any. Because it always copies given characters, the storage for the original array can be safely released after a text for it has been generated.
text_gen() is useful when a caller wants to construct a text that embodies the terminating null character with allocating storage for it. text_put() allocates storage but always precludes the null character, and text_box() can make the resulting text embody the null character but allocates no storage. text_gen() is added to fill the gap.
Possible exceptions: assert_exceptfail, memory_exceptfail
Unchecked errors: invalid string given for str
, invalid size given for size
[in] | str | null terminated string to copy for text representation |
[in] | size | length of string |
char*() text_get | ( | char * | str, | |
int | size, | |||
text_t | s | |||
) |
converts a text to a C string.
text_get() is used when converting a text to a C string that is null-terminated. There are two ways to provide a buffer into which the resulting C string is to be written. If str
is not a null pointer, text_get() assumes that a user provides the buffer whose size is size
, and tries to write the conversion result to it. If its specified size is not enough to contain the result, it raises an exception due to assertion failure. If str
is a null pointer, size
is ignored and text_get() allocates a proper buffer to contain the reulsting string. The Text Library never deallocates the buffer allocated by text_ger(), thus a user has to set it free when it is no longer necessary.
Possible exceptions: assert_exceptfail, memory_exceptfail
Unchecked errors: invalid text given for s
, invalid buffer or size given for str
or size
[out] | str | buffer into which converted string to be written |
[in] | size | size of given buffer |
[in] | s | text to convert to C string |
finds the end of a span consisted of characters from a set.
If the specified range of a text s
starts with a character from a set set
, text_many() returns the right positive position ending a span consisted of characters from the set. The range is specified by i
and j
. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) c a c a o s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_many(t, 2, 6, text_box("ca", 2)) gives 5. If the set containing characters to find is empty, text_many() always fails and returns 0.
Since text_many() checks the range starts with a character from a given set, text_many() is often called after text_upto().
The original code in the book is modified to form a more compact form.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or set
[in] | s | text in which character to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | set | set text containing characters to find |
constructs a text by converting a text based on a specified mapping.
text_map() converts a text based on a mapping that is described by two pointers to texts. Both pointers to describe a mapping should be a null pointers or non-null pointers; it is not allowed for only one of them to be a null pointer.
When they are non-null, they should point to texts whose lengths equal. text_map() takes a text and copies it converting any occurrence of characters in a text pointed by from
to corresponding characters in a text pointed by to
, where the corresponding characters are determined by their positions in a text. Ohter characters are copied unchagned.
Once a mapping is set by calling text_map() with non-null text pointers, text_map() can be called with a null pointers for from
and to
, in which case the latest mapping is used for conversion. Calling with a null pointers is highly recommended whenever possible, since constructing a mapping table from two texts costs time.
For example, after the following call:
result = text_map(t, &text_upper, &text_lower);
result
is a text copied from t
converting any uppercase letters in it to corresponding lowercase letters.
Possible exceptions: assert_exceptfail, memory_exceptfail
Unchecked errors: invalid text given for s
, from
or to
[in] | s | text to convert |
[in] | from | pointer to text describing mapping |
[in] | to | pointer to text describing mapping |
checks if a text starts with another text.
If the specified range of a text s
starts with a text str
, text_match() returns the right positive position ending the matched text. The range is specified by i
and j
. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) c a c a o s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_match(t, 3, 7, text_box("ca", 2)) gives 5. If str
is empty, text_match() always succeeds and returns the left positive position of the specified range.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or str
[in] | s | text in which another text to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | str | text to find |
int() text_pos | ( | text_t | s, | |
int | i | |||
) |
normalizes a text position.
A text position may be negative and it is often necessary to normalize it into the positive range. text_pos() takes a text position and adjusts it to the positive range. For example, given a text:
1 2 3 4 5 (positive positions) t e s t -4 -3 -2 -1 0 (non-positive positions) 0 1 2 3 (array indices)
both text_pos(t, 2) and text_pos(t, -3) give 2.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
[in] | s | string for which position is to be normalized |
[in] | i | position to normalize |
text_t() text_put | ( | const char * | str | ) |
constructs a text from a null-terminated string.
text_put() copies a null-terminated string to the text space and returns a text representing the copied string. The resulting text does not contain the terminating null character. Because it always copies a given string, the storage for the original string can be safely released after a text for it has been generated.
Possible exceptions: assert_exceptfail, memory_exceptfail
Unchecked errors: invalid string given for str
[in] | str | null terminated string to copy for text representation |
int() text_rchr | ( | text_t | s, | |
int | i, | |||
int | j, | |||
int | c | |||
) |
finds the last occurrence of a character in a text.
text_rchr() finds the last occurrence of a character c
in the specified range of a text s
. The range is specified by i
and j
. If found, text_rchr() returns the left position of the found character. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) e v e n t s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_rchr(t, -6, 5, 'e') gives 3 while text_rchr(t, -6, 5, 's') does 0. The "r" in its name stands for "right" since what it does can be seen as scanning a given text from the right end.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
[in] | s | text in which character is to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | c | character to find |
void() text_restore | ( | text_save_t ** | save | ) |
restores a saved state of the text space.
text_restore() gets the text space to a state returned by text_save(). As explained in text_save(), any text and state generated after saving the state to be reverted are invalidated, thus they should not be used. See text_save() for more details.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid saved state given for save
[in] | save | pointer to saved state of text space |
constructs a text by reversing a text.
text_reverse() constructs a text by reversing a given text.
Possible exceptions: assert_exceptfail, memory_exceptfail
Unchecked errors: invalid text given for s
[in] | s | text to reverse |
finds the last occurrence of a text in a text.
text_rfind() finds the last occurrence of a text str
in the specified range of a text s
. The range is specified by i
and j
. If found, text_rfind() returns the left position of the character starting the found text. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) c a c a o s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_rfind(t, -6, 6, text_box("ca", 2)) gives 3. If str
is empty, text_rfind() always succeeds and returns the right positive position of the specified range.
The "r" in its name stands for "right" since what it does can be seen as scanning a given text from the right end.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or str
[in] | s | text in which another text is to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | str | text to find |
finds the start of a span consisted of characters from a set.
If the specified range of a text s
ends with a character from a set set
, text_rmany() returns the left positive position starting a span consisted of characters from the set. The range is specified by i
and j
. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) c a c a o s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_rmany(t, 3, 7, text_box("aos", 3)) gives 4. The "r" in its name stands for "right" since what it does can be seen as scanning a given text from the right end. If the set containing characters to find is empty, text_rmany() always fails and returns 0.
Since text_rmany() checks the range ends with a character from a given set, text_rmany() is often called after text_rupto().
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or set
[in] | s | text in which character to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | set | set text containing characters to find |
checks if a text ends with another text.
If the specified range of a text s
ends with a text str
, text_rmatch() returns the left positive position starting the matched text. The range is specified by i
and j
. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) c a c a o s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_rmatch(t, 3, 7, text_box("os", 2)) gives 5. If str
is empty, text_rmatch() always succeeds and returns the right positive position of the specified range.
The "r" in its name stands for "right" since what it does can be seen as scanning a given text from the right end.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or str
[in] | s | text in which another text to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | str | text to find |
finds the last occurrence of any character from a set in a text.
text_rupto() finds the last occurrence of any character from a set set
in the specified range of a text s
. The range is specified by i
and j
. If found, text_rupto() returns the left position of the found character. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) e v e n t s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_rupto(t, -6, 5, text_box("escape", 6)) gives 3. If the set containing characters to find is empty, text_rupto() always fails and returns 0.
The "r" in its name stands for "right" since what it does can be seen as scanning a given text from the right end.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or set
[in] | s | text in which character is to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | set | set text containing characters to find |
text_save_t*() text_save | ( | void | ) |
saves the current top of the text space.
text_save() saves the current state of the text space and returns it. The text space to provide storages for texts can be seen as a stack and storages allocated by text_*() (except that allocated by text_get()) can be seen as piled up in the stack, thus any storage being used by the Text Library after a call to text_save() can be set free by calling text_restore() with the saved state. After text_restore(), any text constructed after the text_save() call is invalidated and should not be used. In addition, other saved states, if any, get also invalidated if the text space gets back to a previous state by a state saved before they are generated. For example, after the following code:
h = text_save(); ... g = text_save(); ... text_restore(h);
calling text_restore() with g
makes the program behave in an unpredicatble way since the last call to text_restore() with h
invalidates g
.
Possible exceptions: memory_exceptfail
Unchecked errors: none
constructs a sub-text of a text.
text_sub() constructs a sub-text from characters between two specified positions in a text. Positions in a text are specified as in the Doubly-Linked List Library:
1 2 3 4 5 6 7 (positive positions) s a m p l e -6 -5 -4 -3 -2 -1 0 (non-positive positions)
Given the above text, a sub-string amp
can be specified as [2:5], [2:-2], [-5:5] or [-5:-2]. Furthermore, the order in which the positions are given does not matter, which means [5:2] indicates the same sequence of characters as [2:5]. In conclusion, the following calls to text_sub() gives the same sub-text.
Since a user is not allowed to modify the resulting text and it need not end with a null character, text_sub() does not have to allocate storage for the result.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
[in] | s | text from which sub-text to be constructed |
[in] | i | position for sub-text |
[in] | j | position for sub-text |
finds the first occurrence of any character from a set in a text.
text_upto() finds the first occurrence of any character from a set set
in the specified range of a text s
. The range is specified by i
and j
. If found, text_upto() returns the left position of the found character. It returns 0 otherwise. For example, given the following text:
1 2 3 4 5 6 7 (positive positions) e v e n t s -6 -5 -4 -3 -2 -1 0 (non-positive positions)
text_upto(t, -6, 5, text_box("vwxyz", 5)) gives 2. If the set containing characters to find is empty, text_upto() always fails and returns 0.
Possible exceptions: assert_exceptfail
Unchecked errors: invalid text given for s
or set
[in] | s | text in which character is to be found |
[in] | i | range specified |
[in] | j | range specified |
[in] | set | set text containing characters to find |