io
delimited

delimited

Mojo module 🡭

delimited

Working with simple delimited text.

Example

Compile-time known fields:

TODO: this should be two different examples, but the doc parser can’t seem to handle that for this example.

from collections.string import StringSlice
from testing import assert_equal

from extramojo.bstr.bstr import SplitIterator
from extramojo.cli.parser import ParsedOpts
from extramojo.io.buffered import (
    BufferedReader,
    BufferedWriter,
)
from extramojo.io.delimited import (
    DelimReader,
    FromDelimited,
    ToDelimited,
    DelimWriter,
)

# #########################################
# Example with compile-time known fields.
# #########################################

@value
struct SerDerStruct(ToDelimited, FromDelimited):
    var index: Int
    var name: String

    fn write_to_delimited(read self, mut writer: DelimWriter) raises:
        writer.write_record(self.index, self.name)

    fn write_header(read self, mut writer: DelimWriter) raises:
        writer.write_record("index", "name")

    @staticmethod
    fn from_delimited(mut data: SplitIterator, read header_values: Optional[List[String]]=None) raises -> Self:
        var index = Int(StringSlice(unsafe_from_utf8=data.__next__()))
        var name = String()  # String constructor expected nul terminated byte span
        name.write_bytes(data.__next__())
        return Self(index, name)


fn test_delim_reader_writer(file: String) raises:
    var to_write = List[SerDerStruct]()
    for i in range(0, 1000):
        to_write.append(SerDerStruct(i, String("MyNameIs" + String(i))))
    var writer = DelimWriter(
        BufferedWriter(open(String(file), "w")), delim="	", write_header=True
    )
    for item in to_write:
        writer.serialize(item)
    writer.flush()
    writer.close()

    var reader = DelimReader[SerDerStruct](
        BufferedReader(open(String(file), "r")),
        delim=ord("	"),
        has_header=True,
    )
    var count = 0
    for item in reader^:
        assert_equal(to_write[count].index, item.index)
        assert_equal(to_write[count].name, item.name)
        count += 1
    assert_equal(count, len(to_write))

# #########################################
# Example with dynamic fields.
# #########################################

@value
struct Score[
    truth_lengths_origin: ImmutableOrigin,
    truth_names_origin: ImmutableOrigin,
](ToDelimited):
    var assembly_name: String
    var assembly_length: Int
    var scores: List[Int32]
    var truth_lengths: Pointer[List[Int], truth_lengths_origin]
    var truth_names: Pointer[List[String], truth_names_origin]

    fn __init__(
        out self,
        owned assembly_name: String,
        assembly_length: Int,
        owned scores: List[Int32],
        ref [truth_lengths_origin]truth_lengths: List[Int],
        ref [truth_names_origin]truth_names: List[String],
    ):
        self.assembly_name = assembly_name^
        self.assembly_length = assembly_length
        self.scores = scores^
        self.truth_lengths = Pointer(to=truth_lengths)
        self.truth_names = Pointer(to=truth_names)

    fn write_to_delimited(read self, mut writer: DelimWriter) raises:
        writer.write_field(self.assembly_name, is_last=False)
        writer.write_field(self.assembly_length, is_last=False)
        for i in range(0, len(self.scores)):
            writer.write_field(
                String("{}/{}").format(self.scores[i], self.truth_lengths[][i]),
                is_last=i == len(self.scores) - 1,
            )

    fn write_header(read self, mut writer: DelimWriter) raises:
        writer.write_field("assembly_name", is_last=False)
        writer.write_field("assembly_length", is_last=False)
        for i in range(0, len(self.truth_names[])):
            writer.write_field(
                self.truth_names[][i], is_last=i == len(self.truth_names[]) - 1
            )

fn run_check_scores(opts: ParsedOpts) raises:
    var truth_names = List(String("A"), String("B"), String("C"))
    var truth_lengths = List(125, 2000, 1234)
    var output_scores_tsv = "/tmp/out.tsv"

    var scores = List(
        Score(String("Assembly1"), 100, List[Int32](1, 2, 3), truth_lengths, truth_names),
        Score(String("Assembly2"), 100, List[Int32](100, 2, 3), truth_lengths, truth_names),
        Score(String("Assembly3"), 100, List[Int32](1, 100, 3), truth_lengths, truth_names),
        Score(String("Assembly4"), 100, List[Int32](1, 2, 100), truth_lengths, truth_names)
    )

    var out_writer = DelimWriter(
        BufferedWriter(open(output_scores_tsv, "w")),
        delim="	",
        write_header=True,
    )

    for score in scores:
        out_writer.serialize[
            Score[__origin_of(truth_lengths), __origin_of(truth_names)]
        ](score)

    out_writer.flush()
    out_writer.close()

# #########################################
# Example similar to dictreader/dictwriter.
# #########################################

@value
struct ThinWrapper(ToDelimited, FromDelimited):
    var stuff: Dict[String, Int]

    fn write_to_delimited(read self, mut writer: DelimWriter) raises:
        var seen = 1
        for value in self.stuff.values():  # Relying on stable iteration order
            writer.write_field(value, is_last=seen == len(self.stuff))
            seen += 1

    fn write_header(read self, mut writer: DelimWriter) raises:
        var seen = 1
        for header in self.stuff.keys():  # Relying on stable iteration order
            writer.write_field(header, is_last=seen == len(self.stuff))
            seen += 1

    @staticmethod
    fn from_delimited(
        mut data: SplitIterator,
        read header_values: Optional[List[String]] = None,
    ) raises -> Self:
        var result = Dict[String, Int]()
        for header in header_values.value():
            result[header] = Int(
                StringSlice(unsafe_from_utf8=data.__next__())
            )
        return Self(result)


fn test_delim_reader_writer_dicts(file: String) raises:
    var to_write = List[ThinWrapper]()
    var headers = List(
        String("a"), String("b"), String("c"), String("d"), String("e")
    )
    for i in range(0, 1000):
        var stuff = Dict[String, Int]()
        for header in headers:
            stuff[header] = i
        to_write.append(ThinWrapper(stuff))
    var writer = DelimWriter(
        BufferedWriter(open(String(file), "w")),
        delim="	",
        write_header=True,
    )
    for item in to_write:
        writer.serialize(item)
    writer.flush()
    writer.close()

    var reader = DelimReader[ThinWrapper](
        BufferedReader(open(String(file), "r")),
        delim=ord("	"),
        has_header=True,
    )
    var count = 0
    for item in reader^:
        for header in headers:
            assert_equal(to_write[count].stuff[header], item.stuff[header])
        count += 1
    assert_equal(count, len(to_write))
    print("Successful delim_writer")

Structs

Traits