More robust headers and extra options to rename headers

This commit is contained in:
Jürgen Edelbluth 2022-08-17 11:26:23 +02:00
parent b2f4f8b245
commit f40e3e2a66
Signed by: jed
GPG Key ID: 6DEAEDD5CDB646DF
10 changed files with 378 additions and 16 deletions

117
README.md
View File

@ -38,15 +38,16 @@ poetry add --dev pytest-csv-params
Simply decorate your test method with `@csv_params` and the following parameters:
| Parameter | Type | Description | Example |
|--------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------|
| `data_file` | `str` | The CSV file to use, relative or absolute path | `"/var/testdata/test1.csv"` |
| `base_dir` | `str` (optional) | Directory to look up relative CSV files (see `data_file`); overrides the command line argument | `join(dirname(__file__), "assets")` |
| `id_col` | `str` (optional) | Column name of the CSV that contains test case IDs | `"ID#"` |
| `dialect` | `csv.Dialect` (optional) | CSV Dialect definition (see [Python CSV Documentation](https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters)) | `csv.excel_tab` |
| `data_casts` | `dict` (optional) | Cast Methods for the CSV Data (see "Data Casting" below) | `{ "a": int, "b": float }` |
| Parameter | Type | Description | Example |
|------------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------|
| `data_file` | `str` | The CSV file to use, relative or absolute path | `"/var/testdata/test1.csv"` |
| `base_dir` | `str` (optional) | Directory to look up relative CSV files (see `data_file`); overrides the command line argument | `join(dirname(__file__), "assets")` |
| `id_col` | `str` (optional) | Column name of the CSV that contains test case IDs | `"ID#"` |
| `dialect` | `csv.Dialect` (optional) | CSV Dialect definition (see [Python CSV Documentation](https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters)) | `csv.excel_tab` |
| `data_casts` | `dict` (optional) | Cast Methods for the CSV Data (see "Data Casting" below) | `{ "a": int, "b": float }` |
| `header_renames` | `dict` (optional) | Replace headers from the CSV file, so that they can be used as parameters for the test function (since 0.3.0) | `{ "Annual Amount of Bananas": "banana_count", "Cherry export price": "cherry_export_price" }` |
### CSV File Lookup Order
## CSV File Lookup Order
CSV files are looked up following this rules:
@ -57,13 +58,13 @@ CSV files are looked up following this rules:
- If the command line argument is set, the file is looked up there
- If the command line argument is not set, the file is looked up in the current working directory
### Data Casting
## Data Casting
When data is read from CSV, they are always parsed as `str`. If you need them in other formats, you can set a method that should be called with the value.
These methods can also be lambdas, and are also good for further transformations.
#### Data Casting Example
### Data Casting Example
```python
from pytest_csv_params.decorator import csv_params
@ -83,7 +84,7 @@ def test_something(col_x, col_y):
...
```
### CSV Format
## CSV Format
The default CSV format is:
@ -94,7 +95,7 @@ The default CSV format is:
**The first line must contain the row names. Row names must match the parameters of the test method (except for an ID column that is configured as such -- see `id_col` decorator parameter).**
#### Example CSV
### Example CSV
```text
"ID#", "part_a", "part_b", "expected_result"
@ -103,7 +104,21 @@ The default CSV format is:
"third", 10, 11, 21
```
### Usage Example
### Headers
The header line is very important, as it maps the values to parameters of the test function. The plugin supports you with that. The following rules apply:
- Every character that is not valid in a variable name is replaced by an underscore (`_`)
- If the character at the start is not a letter or an underscore, it is replaced by an underscore(`_`)
- If the name is still invalid then, because it's a keyword or a builtin name, an exception is raised (`CsvHeaderNameInvalid`)
If you don't want to change your CSV file, you can use the `header_renames` parameter to the decorator to rename headers as needed.
Headers must be unique, and an Exception is raised if not (`CsvHeaderNameInvalid`).
The header handling was heavily improved in Version 0.3.0.
## Usage Example
This example uses the CSV example from above.
@ -133,8 +148,84 @@ def test_texts(text_a, text_b, text_c):
assert f"{text_a}:{text_b}" == text_c
```
### More complex example
This example features nearly all things the plugin has to offer. You find this example also in the test cases, see `tests/test_complex_example.py`.
The CSV file (`tests/assets/example.csv`):
```text
"Test ID","Bananas shipped","Single Banana Weight","Apples shipped","Single Apple Weight","Container Size"
"Order-7","1503","0.5","2545","0.25","1500"
"Order-15","101","0.55","1474","0.33","550"
```
The Test (`tests/test_complex_example.py`):
```python
from math import ceil
from os.path import join, dirname
from pytest_csv_params.decorator import csv_params
@csv_params(
data_file="example.csv",
base_dir=join(dirname(__file__), "assets"),
id_col="Test ID",
header_renames={
"Bananas shipped": "bananas_shipped",
"Single Banana Weight": "banana_weight",
"Apples shipped": "apples_shipped",
"Single Apple Weight": "apple_weight",
"Container Size": "container_size",
},
data_casts={
"bananas_shipped": int,
"banana_weight": float,
"apples_shipped": int,
"apple_weight": float,
"container_size": int,
},
)
def test_container_size_is_big_enough(
bananas_shipped: int, banana_weight: float, apples_shipped: int, apple_weight: float, container_size: int
) -> None:
"""
This is just an example test case for the documentation.
"""
gross_weight = (banana_weight * bananas_shipped) + (apple_weight * apples_shipped)
assert ceil(gross_weight) <= container_size
```
If you decide not to rename the columns, the test would look like this:
```python
@csv_params(
data_file="example.csv",
base_dir=join(dirname(__file__), "assets"),
id_col="Test ID",
data_casts={
"Bananas_Shipped": int,
"Single_Banana_Weight": float,
"Apples_Shipped": int,
"Single_Apple_Weight": float,
"Container_Size": int,
},
)
def test_container_size_is_big_enough(
Bananas_Shipped: int, Single_Banana_Weight: float, Apples_Shipped: int, Single_Apple_Weight: float, Container_Size: int
) -> None:
...
```
## Breaking Changes
### Version 0.3.0
- Column header names that are reserved keywords or builtin names are no longer accepted. You should have been in trouble already if you used them, so nothing should go wrong with this change and existing tests.
### Version 0.2.0
- The parameter order for `pytest_csv_params.decorator.csv_params` changed to allow the shorthand usage with only a `data_file` as positional argument. If you used keyword arguments only (like the docs recommend), you will not run into trouble.

View File

@ -9,13 +9,15 @@ import pytest
from _pytest.mark import MarkDecorator
from _ptcsvp.plugin import BASE_DIR_KEY, Plugin
from _ptcsvp.varname import make_name_valid
from pytest_csv_params.dialect import CsvParamsDefaultDialect
from pytest_csv_params.exception import (
CsvHeaderNameInvalid,
CsvParamsDataFileInaccessible,
CsvParamsDataFileInvalid,
CsvParamsDataFileNotFound,
)
from pytest_csv_params.types import BaseDir, CsvDialect, DataCasts, DataFile, IdColName
from pytest_csv_params.types import BaseDir, CsvDialect, DataCasts, DataFile, HeaderRenames, IdColName
class TestCaseParameters(TypedDict):
@ -53,12 +55,28 @@ def read_csv(base_dir: BaseDir, data_file: DataFile, dialect: CsvDialect) -> Lis
return csv_lines
def add_parametrization(
def clean_headers(current_headers: List[str], replacing: HeaderRenames) -> List[str]:
"""
Clean the CSV file headers
"""
if replacing is not None:
for index, header in enumerate(current_headers):
replacement = replacing.get(header, None)
if replacement is not None:
current_headers[index] = replacement
current_headers = list(map(make_name_valid, current_headers))
if len(current_headers) != len(set(current_headers)):
raise CsvHeaderNameInvalid("Header names are not unique")
return current_headers
def add_parametrization( # pylint: disable=too-many-arguments
data_file: DataFile,
base_dir: BaseDir = None,
id_col: IdColName = None,
data_casts: DataCasts = None,
dialect: CsvDialect = CsvParamsDefaultDialect,
header_renames: HeaderRenames = None,
) -> MarkDecorator:
"""
Get data from the files and add things to the tests
@ -78,6 +96,7 @@ def add_parametrization(
raise CsvParamsDataFileInvalid(f"Cannot find ID column '{id_col}'") from err
if len(headers) == 0:
raise CsvParamsDataFileInvalid("File seems only to have IDs") from None
headers = clean_headers(headers, header_renames)
data: List[TestCaseParameters] = []
for data_line in csv_lines:
line = list(map(str, data_line))

44
_ptcsvp/varname.py Normal file
View File

@ -0,0 +1,44 @@
"""
Test if a variable name is valid
"""
import builtins
import keyword
import re
from string import ascii_letters, digits
from pytest_csv_params.exception import CsvHeaderNameInvalid
VALID_CHARS = ascii_letters + digits
VARIABLE_NAME = re.compile(r"^[a-zA-Z_][A-Za-z0-9_]{0,1023}$")
def is_valid_name(name: str) -> bool:
"""
Checks if the variable name is valid
"""
if (
keyword.iskeyword(name)
or (hasattr(keyword, "issoftkeyword") and getattr(keyword, "issoftkeyword")(name))
or getattr(builtins, name, None) is not None
):
return False
return VARIABLE_NAME.match(name) is not None
def make_name_valid(name: str, replacement_char: str = "_") -> str:
"""
Make a name valid
"""
fixed_name = name
for index, character in enumerate(name):
if character in VALID_CHARS:
continue
fixed_name = f"{fixed_name[:index]}{replacement_char}{fixed_name[index+1:]}"
if fixed_name[0] not in ascii_letters:
fixed_name = f"{replacement_char}{fixed_name[1:]}"
if not is_valid_name(fixed_name):
raise CsvHeaderNameInvalid(f"'{fixed_name}' is not a valid variable name")
return fixed_name

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "pytest-csv-params"
version = "0.2.2"
version = "0.3.0"
description = "Pytest plugin for Test Case Parametrization with CSV files"
authors = ["Juergen Edelbluth <csv_params@jued.de>"]
license = "MIT"

View File

@ -19,3 +19,9 @@ class CsvParamsDataFileInvalid(ValueError):
"""
CSV Data is somehow invalid
"""
class CsvHeaderNameInvalid(ValueError):
"""
Invalid Header Name
"""

View File

@ -14,3 +14,6 @@ IdColName = Optional[str]
DataFile = str
CsvDialect = Type[csv.Dialect]
HeaderRenamesDict = Dict[str, str]
HeaderRenames = Optional[HeaderRenamesDict]

3
tests/assets/example.csv Normal file
View File

@ -0,0 +1,3 @@
"Test ID","Bananas shipped","Single Banana Weight","Apples shipped","Single Apple Weight","Container Size"
"Order-7","1503","0.5","2545","0.25","1500"
"Order-15","101","0.55","1474","0.33","550"
1 Test ID Bananas shipped Single Banana Weight Apples shipped Single Apple Weight Container Size
2 Order-7 1503 0.5 2545 0.25 1500
3 Order-15 101 0.55 1474 0.33 550

View File

@ -0,0 +1,62 @@
"""
Test cleaning the headers
"""
from typing import List, Optional, Type
import pytest
from _ptcsvp.parametrize import clean_headers
from pytest_csv_params.exception import CsvHeaderNameInvalid
from pytest_csv_params.types import HeaderRenames
@pytest.mark.parametrize(
["current_headers", "replacing", "expect_exception", "expect_message", "expect_result"],
[
(
["alpha 3", "_beta_5", "Ga Mm A"],
None,
None,
None,
["alpha_3", "_beta_5", "Ga_Mm_A"],
),
(
["abcd" * 300, "_be:ta_ :23", "Ra -/2"],
None,
CsvHeaderNameInvalid,
f"'{'abcd' * 300}' is not a valid variable name",
None,
),
(
["next_var", "_be:ta_ :23", "Ra -/2"],
None,
None,
None,
["next_var", "_be_ta___23", "Ra___2"],
),
(
["next_var", "_be:ta_ :23", "Ra -/2", "Ra___2"],
None,
CsvHeaderNameInvalid,
"Header names are not unique",
None,
),
],
)
def test_header_cleaning(
current_headers: List[str],
replacing: HeaderRenames,
expect_exception: Optional[Type[ValueError]],
expect_message: Optional[str],
expect_result: List[str],
) -> None:
"""
Test Header Cleaning
"""
if expect_exception is not None:
with pytest.raises(expect_exception) as raised_error:
clean_headers(current_headers, replacing)
assert raised_error.value.args[0] == expect_message
else:
result = clean_headers(current_headers, replacing)
assert result == expect_result

View File

@ -0,0 +1,37 @@
"""
Example Test Case from the documentation
"""
from math import ceil
from os.path import dirname, join
from pytest_csv_params.decorator import csv_params
@csv_params(
data_file="example.csv",
base_dir=join(dirname(__file__), "assets"),
id_col="Test ID",
header_renames={
"Bananas shipped": "bananas_shipped",
"Single Banana Weight": "banana_weight",
"Apples shipped": "apples_shipped",
"Single Apple Weight": "apple_weight",
"Container Size": "container_size",
},
data_casts={
"bananas_shipped": int,
"banana_weight": float,
"apples_shipped": int,
"apple_weight": float,
"container_size": int,
},
)
def test_container_size_is_big_enough(
bananas_shipped: int, banana_weight: float, apples_shipped: int, apple_weight: float, container_size: int
) -> None:
"""
This is just an example test case for the documentation.
"""
gross_weight = (banana_weight * bananas_shipped) + (apple_weight * apples_shipped)
assert ceil(gross_weight) <= container_size

97
tests/test_varname.py Normal file
View File

@ -0,0 +1,97 @@
"""
Test the varname handling
"""
import sys
from typing import Optional
import pytest
from _ptcsvp.varname import is_valid_name, make_name_valid
from pytest_csv_params.exception import CsvHeaderNameInvalid
@pytest.mark.parametrize(
["var_name", "is_valid"],
[
("var_name", True),
("varname", True),
("_varname", True),
("_varName", True),
("VarName", True),
("VarName_", True),
("Var1Name_", True),
("Var1Name_0", True),
("0_var_name", False),
("var name", False),
("Var Name", False),
("Var-Name", False),
("Var.Name", False),
("Var:Name", False),
(":VarName", False),
(".VarName", False),
(";VarName", False),
("VarName+", False),
("VarName#", False),
("VarNäme", False),
("VarNÖme", False),
("Varßn_ame", False),
("def", False),
("next", False),
("if", False),
],
)
def test_is_valid_name(var_name: str, is_valid: bool) -> None:
"""
Test if the varname is considered valid or not
"""
assert is_valid_name(var_name) == is_valid
@pytest.mark.parametrize(
["var_name", "valid_var_name", "raises_error"],
[
("var_name", "var_name", False),
("4var_name", "_var_name", False),
("44_var_name", "_4_var_name", False),
("varName", "varName", False),
("var Name", "var_Name", False),
(" varName", "_varName", False),
(":varName", "_varName", False),
("var-name", "var_name", False),
("a" * 1025, None, True),
("abcd" * 300, None, True),
("continue", None, True),
("float", None, True),
("str", None, True),
],
)
def test_make_name_valid(var_name: str, valid_var_name: Optional[str], raises_error: bool) -> None:
"""
Check if an invalid name goes valid
"""
if raises_error:
with pytest.raises(CsvHeaderNameInvalid) as raised_error:
make_name_valid(var_name)
assert raised_error.value.args[0] == f"'{var_name}' is not a valid variable name"
else:
should_be_valid = make_name_valid(var_name)
assert should_be_valid == valid_var_name
assert is_valid_name(should_be_valid)
@pytest.mark.skipif(sys.version_info < (3, 10), reason="Only relevant for python 3.10 and above")
@pytest.mark.parametrize(
["name"],
[
("match",),
("_",),
],
)
def test_310_names(name: str) -> None:
"""
Check if special 3.10 names are considered invalid, when on 3.10
"""
assert not is_valid_name(name)