lex tests actually test the tokens now

main
Jordan Orelli 8 months ago
parent cd51f4cce1
commit cb53fb9195

@ -1,27 +1,6 @@
use crate::error::LexError;
use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
fn is_glob(c: char) -> bool {
match c {
'*' | '?' => true,
_ => false,
}
}
fn is_special(c: char) -> bool {
match c {
'?' => true,
_ => false,
}
}
fn is_keyword(s: &str) -> bool {
match s {
"for" => true,
_ => false,
}
}
/// The position of a specific glyph within a corpus of text. We use this for rendering error
/// messages and communicating to the user the location of errors.
#[derive(PartialEq, Clone, Copy)]
@ -82,6 +61,16 @@ pub struct Topoglyph {
bytes: Range<u64>,
}
impl Topoglyph {
fn is_word(&self) -> bool {
self.glyph.is_alphanumeric() || self.glyph == '.'
}
fn is_glob(&self) -> bool {
self.is_word() || self.glyph == '*'
}
}
impl fmt::Debug for Topoglyph {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "{char}@{pos:?}", char = self.glyph, pos = self.position)
@ -294,10 +283,25 @@ impl From<Vec<Topoglyph>> for Lexeme {
#[allow(dead_code)]
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
String(Lexeme),
/// A bare word: a sequence of characters without any quotes. A bare word is always not a glob.
Word(Lexeme),
/// A bare word containing 1 or more of the special characters ? or *
Glob(Lexeme),
}
impl Token {
fn same(&self, other: &Self) -> bool {
use Token::*;
match (self, other) {
(Word(a), Word(b)) => a.text() == b.text(),
(Glob(a), Glob(b)) => a.text() == b.text(),
_ => false,
}
}
}
pub struct Tokenizer<'text> {
source: Topoglypher<'text>,
}
@ -314,7 +318,8 @@ impl<'text> Tokenizer<'text> {
let next = self.source.next()?;
match next.glyph {
_ if next.glyph.is_alphabetic() => Some(self.lex_bare_string(vec![next])),
_ if next.is_word() => Some(self.lex_bare_string(vec![next])),
_ if next.is_glob() => Some(self.lex_glob(vec![next])),
'\\' => match self.source.pop() {
Ok(escaped) => Some(self.lex_bare_string(vec![escaped])),
Err(e) => Some(Err(e)),
@ -330,12 +335,15 @@ impl<'text> Tokenizer<'text> {
while let Some(next) = self.source.peek() {
match next.glyph {
_ if next.glyph.is_whitespace() => break,
_ if next.glyph.is_alphanumeric() => progress.push(self.source.pop()?),
_ if next.is_word() => progress.push(self.source.pop()?),
_ if next.is_glob() => {
progress.push(self.source.pop()?);
return self.lex_glob(progress);
}
'\\' => {
self.source.pop()?;
progress.push(self.source.pop()?);
}
'*' | '?' => return self.lex_glob(progress),
_ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
}
}
@ -343,7 +351,7 @@ impl<'text> Tokenizer<'text> {
if progress.is_empty() {
Err(LexError::UnexpectedEOF)
} else {
Ok(Token::String(progress.into()))
Ok(Token::Word(progress.into()))
}
}
@ -351,8 +359,7 @@ impl<'text> Tokenizer<'text> {
while let Some(next) = self.source.peek() {
match next.glyph {
_ if next.glyph.is_whitespace() => break,
_ if next.glyph.is_alphanumeric() => progress.push(self.source.pop()?),
'*' | '?' => progress.push(self.source.pop()?),
_ if next.is_glob() => progress.push(self.source.pop()?),
'\\' => {
self.source.pop()?;
progress.push(self.source.pop()?);
@ -441,46 +448,61 @@ impl<'text> Iterator for Lexer<'text> {
#[cfg(test)]
mod tests {
use super::*;
use std::iter::zip;
fn lexeme(txt: &str) -> Lexeme {
let x: Vec<Topoglyph> = txt
.chars()
.map(|c| Topoglyph {
glyph: c,
position: Position::start(),
bytes: 0..0,
})
.collect();
x.into()
}
fn glob(txt: &str) -> Token {
Token::Glob(lexeme(txt))
}
fn word(txt: &str) -> Token {
Token::Word(lexeme(txt))
}
/// this macro allows us to specify a set of inputs that we expect to lex successfully.
macro_rules! accept {
($($name:ident: $line:literal ;)+) => {$(
#[test]
fn $name() {
println!("testing that we can lex the following input text:\n\t{}", $line);
let lexer = Tokenizer::new($line);
let tokens: Result<Vec<Token>, LexError> = lexer.collect();
match tokens {
Ok(tokens) => {
println!("output tokens: {tokens:?}");
}
Err(e) => {
println!("output error: {e:?}");
panic!("Encounter an unexpected lex error");
}
}
( $($test_name:ident $input_text:literal [ $( $token:expr )* ])+) => {$(
#[test]
fn $test_name() -> Result<(), LexError> {
#[allow(unused_mut)]
let mut expected: Vec<Token> = Vec::new();
$( expected.push($token); )*
let lexed = lex($input_text)?;
assert_eq!(expected.len(), lexed.len());
for pair in zip(expected, lexed) {
assert!(pair.0.same(&pair.1));
}
)*};
Ok(())
}
)*};
}
/// this macro allows us to specify a set of inputs that we expect to fail to lex successfully.
macro_rules! reject {
($($name:ident: $line:literal ;)+) => {$(
#[test]
fn $name() {
println!("testing that we will fail to lex the following input text:\n\t{}", $line);
let tokens = lex($line);
match tokens {
Ok(tokens) => {
println!("output tokens: {tokens:?}");
panic!("Did not encounter an expected lex error");
}
Err(e) => {
println!("output error: {e:?}");
}
($($test_name:ident: $input_text:literal;)+) => {$(
#[test]
fn $test_name() {
match lex($input_text) {
Ok(tokens) => {
println!("output tokens: {tokens:?}");
panic!("Did not encounter an expected lex error");
}
Err(e) => println!("output error: {e:?}"),
}
)*};
}
)*};
}
reject! {
@ -491,9 +513,6 @@ mod tests {
// input makes no sense
trailing_slash: r"one two three \";
// Globs aren't done yet
glob: "*";
// Vars aren't done yet
var: "@name";
@ -505,19 +524,24 @@ mod tests {
}
accept! {
empty: "";
spaces: " ";
identifier: "a";
identifier_2: " a";
identifier_3: "a ";
identifier_4: " a ";
multi_idents: "one two three four ";
empty "" []
spaces " " []
identifier "a" [ word("a") ]
identifier_2 " a" [ word("a") ]
identifier_3 "a " [ word("a") ]
identifier_4 " a " [ word("a") ]
file_name "poop.exe" [ word("poop.exe") ]
multi_idents "one two three four " [
word("one")
word("two")
word("three")
word("four")
]
glob_1 "*" [ glob("*") ]
glob_2 " * " [ glob("*") ]
glob_3 "x*" [ glob("x*") ]
glob_4 "*x" [ glob("*x") ]
glob_5 "*.py" [ glob("*.py") ]
}
}

@ -139,7 +139,7 @@ impl<'text> Parser<'text> {
fn step_start(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Start));
match self.source.peek()? {
Some(Token::String(_)) => {
Some(Token::Word(_)) => {
self.cursor.push(Value::Statement)?;
let token = self.source.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?;
@ -157,7 +157,7 @@ impl<'text> Parser<'text> {
fn step_statement(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Statement));
match self.source.peek()? {
Some(Token::String(_) | Token::Glob(_)) => {
Some(Token::Word(_) | Token::Glob(_)) => {
let token = self.source.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?;
self.cursor.up()?;

Loading…
Cancel
Save