delimited
Mojo module 🡭
delimited
Working with simple delimited text.
Example
Compile-time known fields:
TODO: this should be two different examples, but the doc parser can’t seem to handle that for this example.
from collections.string import StringSlice
from testing import assert_equal
from extramojo.bstr.bstr import SplitIterator
from extramojo.cli.parser import ParsedOpts
from extramojo.io.buffered import (
BufferedReader,
BufferedWriter,
)
from extramojo.io.delimited import (
DelimReader,
FromDelimited,
ToDelimited,
DelimWriter,
)
# #########################################
# Example with compile-time known fields.
# #########################################
@value
struct SerDerStruct(ToDelimited, FromDelimited):
var index: Int
var name: String
fn write_to_delimited(read self, mut writer: DelimWriter) raises:
writer.write_record(self.index, self.name)
fn write_header(read self, mut writer: DelimWriter) raises:
writer.write_record("index", "name")
@staticmethod
fn from_delimited(mut data: SplitIterator, read header_values: Optional[List[String]]=None) raises -> Self:
var index = Int(StringSlice(unsafe_from_utf8=data.__next__()))
var name = String() # String constructor expected nul terminated byte span
name.write_bytes(data.__next__())
return Self(index, name)
fn test_delim_reader_writer(file: String) raises:
var to_write = List[SerDerStruct]()
for i in range(0, 1000):
to_write.append(SerDerStruct(i, String("MyNameIs" + String(i))))
var writer = DelimWriter(
BufferedWriter(open(String(file), "w")), delim=" ", write_header=True
)
for item in to_write:
writer.serialize(item)
writer.flush()
writer.close()
var reader = DelimReader[SerDerStruct](
BufferedReader(open(String(file), "r")),
delim=ord(" "),
has_header=True,
)
var count = 0
for item in reader^:
assert_equal(to_write[count].index, item.index)
assert_equal(to_write[count].name, item.name)
count += 1
assert_equal(count, len(to_write))
# #########################################
# Example with dynamic fields.
# #########################################
@value
struct Score[
truth_lengths_origin: ImmutableOrigin,
truth_names_origin: ImmutableOrigin,
](ToDelimited):
var assembly_name: String
var assembly_length: Int
var scores: List[Int32]
var truth_lengths: Pointer[List[Int], truth_lengths_origin]
var truth_names: Pointer[List[String], truth_names_origin]
fn __init__(
out self,
owned assembly_name: String,
assembly_length: Int,
owned scores: List[Int32],
ref [truth_lengths_origin]truth_lengths: List[Int],
ref [truth_names_origin]truth_names: List[String],
):
self.assembly_name = assembly_name^
self.assembly_length = assembly_length
self.scores = scores^
self.truth_lengths = Pointer(to=truth_lengths)
self.truth_names = Pointer(to=truth_names)
fn write_to_delimited(read self, mut writer: DelimWriter) raises:
writer.write_field(self.assembly_name, is_last=False)
writer.write_field(self.assembly_length, is_last=False)
for i in range(0, len(self.scores)):
writer.write_field(
String("{}/{}").format(self.scores[i], self.truth_lengths[][i]),
is_last=i == len(self.scores) - 1,
)
fn write_header(read self, mut writer: DelimWriter) raises:
writer.write_field("assembly_name", is_last=False)
writer.write_field("assembly_length", is_last=False)
for i in range(0, len(self.truth_names[])):
writer.write_field(
self.truth_names[][i], is_last=i == len(self.truth_names[]) - 1
)
fn run_check_scores(opts: ParsedOpts) raises:
var truth_names = List(String("A"), String("B"), String("C"))
var truth_lengths = List(125, 2000, 1234)
var output_scores_tsv = "/tmp/out.tsv"
var scores = List(
Score(String("Assembly1"), 100, List[Int32](1, 2, 3), truth_lengths, truth_names),
Score(String("Assembly2"), 100, List[Int32](100, 2, 3), truth_lengths, truth_names),
Score(String("Assembly3"), 100, List[Int32](1, 100, 3), truth_lengths, truth_names),
Score(String("Assembly4"), 100, List[Int32](1, 2, 100), truth_lengths, truth_names)
)
var out_writer = DelimWriter(
BufferedWriter(open(output_scores_tsv, "w")),
delim=" ",
write_header=True,
)
for score in scores:
out_writer.serialize[
Score[__origin_of(truth_lengths), __origin_of(truth_names)]
](score)
out_writer.flush()
out_writer.close()
# #########################################
# Example similar to dictreader/dictwriter.
# #########################################
@value
struct ThinWrapper(ToDelimited, FromDelimited):
var stuff: Dict[String, Int]
fn write_to_delimited(read self, mut writer: DelimWriter) raises:
var seen = 1
for value in self.stuff.values(): # Relying on stable iteration order
writer.write_field(value, is_last=seen == len(self.stuff))
seen += 1
fn write_header(read self, mut writer: DelimWriter) raises:
var seen = 1
for header in self.stuff.keys(): # Relying on stable iteration order
writer.write_field(header, is_last=seen == len(self.stuff))
seen += 1
@staticmethod
fn from_delimited(
mut data: SplitIterator,
read header_values: Optional[List[String]] = None,
) raises -> Self:
var result = Dict[String, Int]()
for header in header_values.value():
result[header] = Int(
StringSlice(unsafe_from_utf8=data.__next__())
)
return Self(result)
fn test_delim_reader_writer_dicts(file: String) raises:
var to_write = List[ThinWrapper]()
var headers = List(
String("a"), String("b"), String("c"), String("d"), String("e")
)
for i in range(0, 1000):
var stuff = Dict[String, Int]()
for header in headers:
stuff[header] = i
to_write.append(ThinWrapper(stuff))
var writer = DelimWriter(
BufferedWriter(open(String(file), "w")),
delim=" ",
write_header=True,
)
for item in to_write:
writer.serialize(item)
writer.flush()
writer.close()
var reader = DelimReader[ThinWrapper](
BufferedReader(open(String(file), "r")),
delim=ord(" "),
has_header=True,
)
var count = 0
for item in reader^:
for header in headers:
assert_equal(to_write[count].stuff[header], item.stuff[header])
count += 1
assert_equal(count, len(to_write))
print("Successful delim_writer")
Structs
DelimReader
: Read delimited data that is delimited by a single bytes.DelimWriter
: Write delimited data.
Traits
FromDelimited
: Create an instance ofSelf
from the iterator overSpan[UInt8]
bytes.ToDelimited