Prompt Format
class
ruprompts.prompt_format.PromptFormat
(template: str, compiled_template: Optional[str] = None, tokenizer: Optional[transformers.tokenization_utils_base.PreTrainedTokenizerBase] = None)
ruprompts.prompt_format.PromptFormat
(template: str, compiled_template: Optional[str] = None, tokenizer: Optional[transformers.tokenization_utils_base.PreTrainedTokenizerBase] = None)
Arranges trainable tokens and dataset fields.
Format patterns:
- Repeated tokens:
- Pattern:
<P*{int}>
- Example:
<P*3>
- Compiled example:
<P><P><P>
- Pattern:
- Initialization from phrase:
- Pattern:
<P>{str}</P>
- Example:
<P>Two tokens</P>
- Compiled example:
<P><P>
, prompt provider is initialized with embeddings of tokensTwo
andtokens
- Pattern:
Examples:
>>> PromptProvider("<P*20>{text}<P*10>")
>>> PromptProvider("<P>Passage:</P>{passage}<P>\nQuestion:</P>{question}<P>\nAnswer:</P>")
See also:
BasePromptFormat.__call__
Parameters:
Name | Type | Description | Default |
---|---|---|---|
template |
str
|
See format patterns. |
required |
compiled_template |
Optional[str]
|
Compiled template. |
None |
tokenizer |
Optional[transformers.tokenization_utils_base.PreTrainedTokenizerBase]
|
Tokenizer to process the |
None |
class
ruprompts.prompt_format.PromptFormatSafe
(*args, **kwargs)
ruprompts.prompt_format.PromptFormatSafe
(*args, **kwargs)
class
ruprompts.prompt_format.BasePromptFormat
ruprompts.prompt_format.BasePromptFormat
Base class for all prompt formats.
property
readonly
prompt_length
: int
prompt_length
: int
Count of prompt tokens.
special
__call__
(items: Union[Dict[str, Any], List[Dict[str, Any]]] = None, return_ranges: bool = False, **kwargs) -> Union[str, Tuple[str, Dict[str, slice]], List[str], Tuple[List[str], List[Dict[str, slice]]]]
__call__
(items: Union[Dict[str, Any], List[Dict[str, Any]]] = None, return_ranges: bool = False, **kwargs) -> Union[str, Tuple[str, Dict[str, slice]], List[str], Tuple[List[str], List[Dict[str, slice]]]]
Applies prompt format to either one or multiple items.
Takes a either one item or list of them, where item is a dictionary with string keys.
Each item is then formatted into a single string, where the keys are inserted
the same way as in format string. If return_ranges=True
, also returns a dict of slices,
for the value of each key in item containing its start and end positions in the resulting string.
Examples:
>>> f = PromptFormat("<P>{text}<P>")
>>> item = {"text": "one two three", "other": "value"}
>>> s, r = f(item, return_ranges=True)
>>> assert s == "<P>one two three<P>"
>>> assert s[r] == item["text"]
>>> f(text="one two three", return_ranges=True)
>>> f([{"text": "a"}, {"text": "b"}], return_ranges=True)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
items |
Union[Dict[str, Any], List[Dict[str, Any]]]
|
Item or list of items. |
None |
return_ranges |
bool
|
Whether to return ranges. |
False |
**kwargs |
Can be used instead of |
{} |
Returns:
Type | Description | Condition |
---|---|---|
str |
formatted string | items is a Dict[str, Any] and return_ranges=False |
Tuple[str, Dict[str, slice]] |
formatted string and ranges | items is a Dict[str, Any] and return_ranges=True |
List[str] |
list of formatted strings | items is a List[Dict[str, Any]] and return_ranges=False |
Tuple[List[str], List[Dict[str, slice]]] |
list of formatted strings and ranges | items is a List[Dict[str, Any]] and return_ranges=True |
Source code in ruprompts/prompt_format.py
def __call__(
self,
items: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
return_ranges: bool = False,
**kwargs,
) -> Union[
str, Tuple[str, Dict[str, slice]], List[str], Tuple[List[str], List[Dict[str, slice]]]
]:
"""Applies prompt format to either one or multiple items.
Takes a either one item or list of them, where item is a dictionary with string keys.
Each item is then formatted into a single string, where the keys are inserted
the same way as in format string. If `return_ranges=True`, also returns a dict of slices,
for the value of each key in item containing its start and end positions in the resulting string.
Examples:
>>> f = PromptFormat("<P>{text}<P>")
>>> item = {"text": "one two three", "other": "value"}
>>> s, r = f(item, return_ranges=True)
>>> assert s == "<P>one two three<P>"
>>> assert s[r] == item["text"]
>>> f(text="one two three", return_ranges=True)
>>> f([{"text": "a"}, {"text": "b"}], return_ranges=True)
Args:
items: Item or list of items.
return_ranges: Whether to return ranges.
**kwargs: Can be used instead of `items` (see examples).
# Returns:
| Type | Description | Condition |
| ------------------------------------------ | ------------------------------------ | ------------------------------------------------------------- |
| `str` | formatted string | `items` is a `Dict[str, Any]` and `return_ranges=False` |
| `Tuple[str, Dict[str, slice]]` | formatted string and ranges | `items` is a `Dict[str, Any]` and `return_ranges=True` |
| `List[str]` | list of formatted strings | `items` is a `List[Dict[str, Any]]` and `return_ranges=False` |
| `Tuple[List[str], List[Dict[str, slice]]]` | list of formatted strings and ranges | `items` is a `List[Dict[str, Any]]` and `return_ranges=True` |
"""
if items is None:
items = kwargs
if isinstance(items, list):
return self.batch_format(items, return_ranges)
return self.format(items, return_ranges)
as_dict
() -> Dict[str, Any]
as_dict
() -> Dict[str, Any]
Serializes the prompt object as dict.
Returns such a dict d
that running __init__(**d)
results in an identical object.
Source code in ruprompts/prompt_format.py
@abc.abstractmethod
def as_dict(self) -> Dict[str, Any]:
"""Serializes the prompt object as dict.
Returns such a dict `d` that running `__init__(**d)`
results in an identical object.
"""
batch_format
(items: List[Dict[str, Any]], return_ranges: bool = False) -> Union[List[str], Tuple[List[str], List[Dict[str, slice]]]]
batch_format
(items: List[Dict[str, Any]], return_ranges: bool = False) -> Union[List[str], Tuple[List[str], List[Dict[str, slice]]]]
Formats a list of items into strings and possibly returns ranges.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item |
Items to be formatted. |
required | |
return_ranges |
bool
|
Whether to return ranges. |
False |
Returns:
Type | Description |
---|---|
Union[List[str], Tuple[List[str], List[Dict[str, slice]]]]
|
Union[List[str], Tuple[List[str], List[Dict[str, slice]]]]: Returns |
Source code in ruprompts/prompt_format.py
def batch_format(
self, items: List[Dict[str, Any]], return_ranges: bool = False
) -> Union[List[str], Tuple[List[str], List[Dict[str, slice]]]]:
"""Formats a list of items into strings and possibly returns ranges.
Args:
item: Items to be formatted.
return_ranges: Whether to return ranges.
Returns:
Union[List[str], Tuple[List[str], List[Dict[str, slice]]]]: Returns `List[str]` when `return_ranges=False`
and `Tuple[List[str], List[Dict[str, slice]]]` when `return_ranges=True`
"""
result = [self.format(item, return_ranges=return_ranges) for item in items]
if return_ranges:
result, ranges = list(zip(*result))
return list(result), list(ranges)
return result
format
(item: Dict[str, Any], return_ranges: bool = False) -> Union[str, Tuple[str, Dict[str, slice]]]
format
(item: Dict[str, Any], return_ranges: bool = False) -> Union[str, Tuple[str, Dict[str, slice]]]
Formats one item into a string and possibly returns ranges.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item |
Dict[str, Any]
|
Item to be formatted. |
required |
return_ranges |
bool
|
Whether to return ranges. |
False |
Returns:
Type | Description |
---|---|
Union[str, Tuple[str, Dict[str, slice]]]
|
Union[str, Tuple[str, Dict[str, slice]]]: Returns |
Source code in ruprompts/prompt_format.py
@abc.abstractmethod
def format(
self, item: Dict[str, Any], return_ranges: bool = False
) -> Union[str, Tuple[str, Dict[str, slice]]]:
"""Formats one item into a string and possibly returns ranges.
Args:
item: Item to be formatted.
return_ranges: Whether to return ranges.
Returns:
Union[str, Tuple[str, Dict[str, slice]]]: Returns `str` when `return_ranges=False`
and `Tuple[str, Dict[str, slice]]` when `return_ranges=True`
"""