一个简单的需求:读入一个目录内的所有文本文件,每个文件的格式都是每一行定义一个二维点,例如x=1,y=2;获取所有点的一个列表。这里不使用serde或者现成的parser库,这么简单的格式直接手写就行了
先来一个糙快猛的版本。其中用了一个nightly feature str_split_once,不过这不重要
#![feature(str_split_once)] #[derive(Debug)] struct Point { x: i32, y: i32, } #[derive(Debug)] struct Directory(Vec<(&'static str, &'static str)>); fn parse_file(text: &str) -> Vec<Point> { let mut points = Vec::new(); for line in text.lines() { let mut x = None; let mut y = None; for field in line.split(',') { let (k, v) = field.split_once('=').unwrap(); // todo match k.trim() { "x" => { x = Some(v.trim().parse().unwrap()); }, // todo "y" => { y = Some(v.trim().parse().unwrap()); }, // todo _ => {}, // todo } } points.push(Point { x: x.unwrap(), y: y.unwrap() }); // todo } points } fn main() { let d = Directory(vec![ ("file1", "x=3,y=4\nx=1,y=2") ]); for (filename, text) in &d.0 { println!("{} {:?}", filename, parse_file(text)); } }
上面的代码里留下了5个todo。想成为一段健壮的程序,它们全部都要处理。这里使用了thiserror库,要换成snafu或者手写也是完全没问题的。代码行数一下就翻倍了,不过看起来是那么的正确
#![feature(str_split_once)] use std::num::ParseIntError; use thiserror::Error; use fehler::{throws, throw}; use self::ParseFileError::*; #[derive(Error, Debug)] enum ParseFileError { #[error("no equal sign found: `{field}` in line `{line}`")] NoEqualSign { field: String, line: String }, #[error("parse x as i32 failed: `{field}` in line `{line}`")] XParseFailed { source: ParseIntError, field: String, line: String }, #[error("parse y as i32 failed: `{field}` in line `{line}`")] YParseFailed { source: ParseIntError, field: String, line: String }, #[error("unknown key: `{field}` in line `{line}`")] UnknownField { field: String, line: String}, #[error("x not found: `{line}`")] XNotFound { line: String }, #[error("y not found: `{line}`")] YNotFound { line: String }, } #[derive(Debug)] struct Point { x: i32, y: i32, } #[derive(Debug)] struct Directory(Vec<(&'static str, &'static str)>); #[throws(ParseFileError)] fn parse_file(text: &str) -> Vec<Point> { let mut points = Vec::new(); for line in text.lines() { let mut x = None; let mut y = None; for field in line.split(',') { let (k, v) = field.split_once('=').ok_or_else(|| NoEqualSign { field: field.into(), line: line.into(), })?; match k.trim() { "x" => { x = Some(v.trim().parse().map_err(|e| { XParseFailed { source: e, field: field.into(), line: line.into(), } })?); }, "y" => { y = Some(v.trim().parse().map_err(|e| { YParseFailed { source: e, field: field.into(), line: line.into(), } })?); }, _ => { throw!(UnknownField { field: field.into(), line: line.into(), }) }, } } points.push(Point { x: x.ok_or_else(|| XNotFound { line: line.into() })?, y: y.ok_or_else(|| YNotFound { line: line.into() })?, }); } points } fn main() { let d = Directory(vec![ ("file1", "x=3,y=4\nx=1,y=2") ]); for (filename, text) in &d.0 { println!("{} {:?}", filename, parse_file(text)); } }
需求方说,你的程序的错误报告做得非常好,不过如果能够同时报告所有错误,那就完美了。于是我们又给ParseFileError加上了List的variant:
#![feature(str_split_once)] use std::num::ParseIntError; use thiserror::Error; use fehler::{throws, throw}; use self::ParseFileError::*; #[derive(Error, Debug)] enum ParseFileError { #[error("no equal sign found: `{field}` in line `{line}`")] NoEqualSign { field: String, line: String }, #[error("parse x as i32 failed: `{field}` in line `{line}`")] XParseFailed { source: ParseIntError, field: String, line: String }, #[error("parse y as i32 failed: `{field}` in line `{line}`")] YParseFailed { source: ParseIntError, field: String, line: String }, #[error("unknown key: `{field}` in line `{line}`")] UnknownField { field: String, line: String}, #[error("x not found: `{line}`")] XNotFound { line: String }, #[error("y not found: `{line}`")] YNotFound { line: String }, #[error("multiple errors")] List(Vec<ParseFileError>), } #[derive(Debug)] struct Point { x: i32, y: i32, } #[derive(Debug)] struct Directory(Vec<(&'static str, &'static str)>); #[throws(ParseFileError)] fn parse_file(text: &str) -> Vec<Point> { let mut points = Vec::new(); let mut error_list = Vec::new(); for line in text.lines() { let mut line_error_list = Vec::new(); let mut x = None; let mut y = None; for field in line.split(',') { let (k, v) = match field.split_once('=') { Some((k, v)) => (k, v), None => { line_error_list.push(NoEqualSign { field: field.into(), line: line.into(), }); continue; // 后面的解析都没有意义了,直接continue。如果parse_line是一个单独的函数,这里相当于无视收集错误可以直接用?返回。懒得展示了…… } }; match k.trim() { "x" => { // 写起来会很麻烦…… match v.trim().parse() { Ok(xx) => x = Some(xx), Err(e) => { line_error_list.push(XParseFailed { source: e, field: field.into(), line: line.into(), }); }, } }, "y" => { // 或者可以这样“简洁”地写 y = v.trim().parse().map_err(|e| { line_error_list.push(YParseFailed { source: e, field: field.into(), line: line.into(), }); () // 可省略 }).ok(); }, _ => { line_error_list.push(UnknownField { field: field.into(), line: line.into(), }); }, } } // 可能需求会是如果此行没有找到等号,就不用报告找不到x/y的错误了 // 或者如果解析x时出错,也不用报告找不到x的错误 // 这里就不处理那么多了…… if x.is_none() { line_error_list.push(XNotFound { line: line.into(), }); } if y.is_none() { line_error_list.push(YNotFound { line: line.into(), }); } if line_error_list.is_empty() { points.push(Point { x: x.unwrap(), y: y.unwrap(), }); } else { error_list.extend(line_error_list); } } if error_list.is_empty() { points } else { throw!(List(error_list)); } } fn main() { let d = Directory(vec![ ("file1", "x=3,y=4t\nx=1,y=2,z=3") ]); for (filename, text) in &d.0 { println!("{} {:?}", filename, parse_file(text)); } }
甲方爸爸又来了,先是惯例地夸赞了一番,然后说,某些数据里除了x、y之外,还可能有z、w等等字段,它们直接忽略即可,就不用报错了。不过呢,如果能够输出一句警告,说明哪个文件的哪一行有这些字段,那就能让这个完美的工具更完美了。
@@ -3,7 +3,7 @@ use std::num::ParseIntError; use thiserror::Error; -use fehler::{throws, throw}; +use w_result::WResult; use self::ParseFileError::*; @@ -35,10 +35,10 @@ struct Point { #[derive(Debug)] struct Directory(Vec<(&'static str, &'static str)>); -#[throws(ParseFileError)] -fn parse_file(text: &str) -> Vec<Point> { +fn parse_file(text: &str) -> WResult<Vec<Point>, ParseFileError, ParseFileError> { let mut points = Vec::new(); let mut error_list = Vec::new(); + let mut warning_list = Vec::new(); for line in text.lines() { let mut line_error_list = Vec::new(); let mut x = None; @@ -80,7 +80,7 @@ fn parse_file(text: &str) -> Vec<Point> { }).ok(); }, _ => { - line_error_list.push(UnknownField { + warning_list.push(UnknownField { field: field.into(), line: line.into(), }); @@ -113,18 +113,18 @@ fn parse_file(text: &str) -> Vec<Point> { } if error_list.is_empty() { - points + WResult::WOk(points, warning_list) } else { - throw!(List(error_list)); + WResult::WErr(List(error_list)) } }
这次的改动比较小,主要就是引入WResult。继放弃?之后,fehler也被放弃了。
现在的代码已经非常别扭了。如果一开始就知道有这些需求,应该怎么设计呢?我们已经用过了标准库里的Result,第三方的WResult,试试自己定义的MyResult。下面是经过了几次整理的最终代码,自我感觉思路应该是对了。命名等比较随意就不要追究了。
#![feature(str_split_once)] #![feature(try_trait)] #[macro_use] extern crate shrinkwraprs; use std::num::ParseIntError; use std::fmt::{self, Debug}; use std::error::Error as _; use std::ops::Try; use thiserror::Error; use self::ParseLineError::*; use self::MyResult::{MyOk, MyErr}; use self::MyError::{Collected, Fatal}; #[derive(Debug)] struct Point { x: i32, y: i32, } #[derive(Debug)] struct Directory(Vec<(&'static str, &'static str)>); #[derive(Debug, Shrinkwrap)] #[shrinkwrap(mutable)] struct Warnings<E>(Vec<E>); impl<E> Warnings<E> { fn new() -> Self { Self(Vec::new()) } } impl<E: Debug> From<Warnings<E>> for MyError<E> { fn from(warn: Warnings<E>) -> Self { Collected(warn.0) } } #[derive(Debug, Error)] enum MyError<E: Debug> { Collected(Vec<E>), Fatal(E), } impl<E: Debug> fmt::Display for MyError<E> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}", self) } } #[must_use] #[derive(Debug)] enum MyResult<T, E: Debug> { MyOk { ok: T, warn: Warnings<E>, }, MyErr(MyError<E>), } impl<T, E: Debug> Try for MyResult<T, E> { type Ok = T; type Error = E; fn into_result(self) -> Result<T, E> { unimplemented!() } fn from_ok(v: T) -> Self { MyOk { ok: v, warn: Warnings::new(), } } fn from_error(v: E) -> Self { MyErr(Fatal(v)) } } impl<T, E: Debug> MyResult<T, E> { fn drain_warning(self, f: &dyn Fn(E)) -> MyResult<T, E> { match self { MyOk {ok, mut warn} => { for w in warn.drain(..) { f(w); } MyOk { ok, warn: Warnings::new(), } }, MyErr(e) => MyErr(e), } } } #[derive(Error, Debug)] enum ParseLineError { #[error("no equal sign found: `{field}`")] NoEqualSign { field: String }, #[error("parse x as i32 failed: `{field}`")] XParseFailed { source: ParseIntError, field: String }, #[error("parse y as i32 failed: `{field}`")] YParseFailed { source: ParseIntError, field: String }, #[error("unknown key: `{field}`")] UnknownField { field: String}, #[error("x not found")] XNotFound, #[error("y not found")] YNotFound, } fn ok_wrapping<T, E: Debug>(ok: impl FnOnce() -> T, warning_list: Warnings<E>, error_list: Vec<E>) -> MyResult<T, E> { if error_list.is_empty() { MyOk { ok: ok(), warn: warning_list, } } else { MyErr(Collected(error_list)) } } fn parse_line(line: &str) -> MyResult<Point, ParseLineError> { let mut warning_list = Warnings::new(); let mut error_list = Vec::new(); let mut x = None; let mut y = None; for field in line.split(',') { let (k, v) = match field.split_once('=') { Some((k, v)) => (k, v), None => { return MyErr(Fatal(NoEqualSign { field: field.into(), })); // 为MyResult实现了Try,也可以使用?:Err(NoEqualSign { field: field.into(), })? } }; match k.trim() { "x" => { // 写起来会很麻烦…… match v.trim().parse() { Ok(xx) => x = Some(xx), Err(e) => { error_list.push(XParseFailed { source: e, field: field.into(), }); }, } }, "y" => { // 或者可以这样“简洁”地写 y = v.trim().parse().map_err(|e| { error_list.push(YParseFailed { source: e, field: field.into(), }); () // 可省略 }).ok(); }, _ => { warning_list.push(UnknownField { field: field.into(), }); }, } } // 可能需求会是如果此行没有找到等号,就不用报告找不到x/y的错误了 // 或者如果解析x时出错,也不用报告找不到x的错误 // 这里就不处理那么多了…… if x.is_none() { error_list.push(XNotFound); } if y.is_none() { error_list.push(YNotFound); } ok_wrapping(|| Point { x: x.unwrap(), y: y.unwrap(), }, warning_list, error_list) } #[derive(Error, Debug)] enum ParseFileError { #[error("parse line failed: `{line}`")] Line {line: String, source: MyError<ParseLineError>}, } fn parse_file(text: &str) -> MyResult<Vec<Point>, ParseFileError> { let mut points = Vec::new(); let mut warning_list = Warnings::new(); let mut error_list = Vec::new(); for line in text.lines() { match parse_line(line) { MyOk { ok, warn } => { points.push(ok); if !warn.is_empty() { warning_list.push(ParseFileError::Line { line: line.into(), source: warn.into(), }); } }, MyErr(e) => { error_list.push(ParseFileError::Line { line: line.into(), source: e, }); } } } ok_wrapping(|| points, warning_list, error_list) } #[derive(Error, Debug)] enum ParseDirectoryError { #[error("parse file failed: `{file}`")] File {file: String, source: MyError<ParseFileError>}, } fn parse_directory(directory: &Directory) -> MyResult<Vec<Point>, ParseDirectoryError> { let mut points = Vec::new(); let mut warning_list = Warnings::new(); let mut error_list = Vec::new(); for (filename, text) in &directory.0 { match parse_file(text) { MyOk { ok, warn } => { points.extend(ok); if !warn.is_empty() { warning_list.push(ParseDirectoryError::File { file: (*filename).into(), source: warn.into(), }); } }, MyErr(e) => { error_list.push(ParseDirectoryError::File { file: (*filename).into(), source: e, }); } } } ok_wrapping(|| points, warning_list, error_list) } fn main() { let d = Directory(vec![ ("file1", "x=3,y=4\nx=1,y=2,z=3") ]); let r = parse_directory(&d).drain_warning(&|e| { println!("{}", e); if let Some(s) = e.source() { println!("caused by: {}", s); } }); println!("{:?}", r); }
Error
trait) can only represent singly-linked lists for chains of errors。现在的状态Error Reporter并不能太好地工作错误处理真是太累了……总之暂时先用这个思路试试看效果如何。当然绝大部分情况下我肯定都会偷懒,但至少需要返回错误列表时我会统一使用MyResult而不是一会Vec<Result<()>>一会Vec<Error>了
转自:
作者:
juu wiio https://zhuanlan.zhihu.com/p/234820782