lex tests actually test the tokens now

main
Jordan Orelli 11 months ago
parent cd51f4cce1
commit cb53fb9195

@ -1,27 +1,6 @@
use crate::error::LexError; use crate::error::LexError;
use std::{collections::VecDeque, fmt, ops::Range, str::Chars}; use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
fn is_glob(c: char) -> bool {
match c {
'*' | '?' => true,
_ => false,
}
}
fn is_special(c: char) -> bool {
match c {
'?' => true,
_ => false,
}
}
fn is_keyword(s: &str) -> bool {
match s {
"for" => true,
_ => false,
}
}
/// The position of a specific glyph within a corpus of text. We use this for rendering error /// The position of a specific glyph within a corpus of text. We use this for rendering error
/// messages and communicating to the user the location of errors. /// messages and communicating to the user the location of errors.
#[derive(PartialEq, Clone, Copy)] #[derive(PartialEq, Clone, Copy)]
@ -82,6 +61,16 @@ pub struct Topoglyph {
bytes: Range<u64>, bytes: Range<u64>,
} }
impl Topoglyph {
fn is_word(&self) -> bool {
self.glyph.is_alphanumeric() || self.glyph == '.'
}
fn is_glob(&self) -> bool {
self.is_word() || self.glyph == '*'
}
}
impl fmt::Debug for Topoglyph { impl fmt::Debug for Topoglyph {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "{char}@{pos:?}", char = self.glyph, pos = self.position) write!(f, "{char}@{pos:?}", char = self.glyph, pos = self.position)
@ -294,10 +283,25 @@ impl From<Vec<Topoglyph>> for Lexeme {
#[allow(dead_code)] #[allow(dead_code)]
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum Token { pub enum Token {
String(Lexeme), /// A bare word: a sequence of characters without any quotes. A bare word is always not a glob.
Word(Lexeme),
/// A bare word containing 1 or more of the special characters ? or *
Glob(Lexeme), Glob(Lexeme),
} }
impl Token {
fn same(&self, other: &Self) -> bool {
use Token::*;
match (self, other) {
(Word(a), Word(b)) => a.text() == b.text(),
(Glob(a), Glob(b)) => a.text() == b.text(),
_ => false,
}
}
}
pub struct Tokenizer<'text> { pub struct Tokenizer<'text> {
source: Topoglypher<'text>, source: Topoglypher<'text>,
} }
@ -314,7 +318,8 @@ impl<'text> Tokenizer<'text> {
let next = self.source.next()?; let next = self.source.next()?;
match next.glyph { match next.glyph {
_ if next.glyph.is_alphabetic() => Some(self.lex_bare_string(vec![next])), _ if next.is_word() => Some(self.lex_bare_string(vec![next])),
_ if next.is_glob() => Some(self.lex_glob(vec![next])),
'\\' => match self.source.pop() { '\\' => match self.source.pop() {
Ok(escaped) => Some(self.lex_bare_string(vec![escaped])), Ok(escaped) => Some(self.lex_bare_string(vec![escaped])),
Err(e) => Some(Err(e)), Err(e) => Some(Err(e)),
@ -330,12 +335,15 @@ impl<'text> Tokenizer<'text> {
while let Some(next) = self.source.peek() { while let Some(next) = self.source.peek() {
match next.glyph { match next.glyph {
_ if next.glyph.is_whitespace() => break, _ if next.glyph.is_whitespace() => break,
_ if next.glyph.is_alphanumeric() => progress.push(self.source.pop()?), _ if next.is_word() => progress.push(self.source.pop()?),
_ if next.is_glob() => {
progress.push(self.source.pop()?);
return self.lex_glob(progress);
}
'\\' => { '\\' => {
self.source.pop()?; self.source.pop()?;
progress.push(self.source.pop()?); progress.push(self.source.pop()?);
} }
'*' | '?' => return self.lex_glob(progress),
_ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)), _ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
} }
} }
@ -343,7 +351,7 @@ impl<'text> Tokenizer<'text> {
if progress.is_empty() { if progress.is_empty() {
Err(LexError::UnexpectedEOF) Err(LexError::UnexpectedEOF)
} else { } else {
Ok(Token::String(progress.into())) Ok(Token::Word(progress.into()))
} }
} }
@ -351,8 +359,7 @@ impl<'text> Tokenizer<'text> {
while let Some(next) = self.source.peek() { while let Some(next) = self.source.peek() {
match next.glyph { match next.glyph {
_ if next.glyph.is_whitespace() => break, _ if next.glyph.is_whitespace() => break,
_ if next.glyph.is_alphanumeric() => progress.push(self.source.pop()?), _ if next.is_glob() => progress.push(self.source.pop()?),
'*' | '?' => progress.push(self.source.pop()?),
'\\' => { '\\' => {
self.source.pop()?; self.source.pop()?;
progress.push(self.source.pop()?); progress.push(self.source.pop()?);
@ -441,46 +448,61 @@ impl<'text> Iterator for Lexer<'text> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use std::iter::zip;
fn lexeme(txt: &str) -> Lexeme {
let x: Vec<Topoglyph> = txt
.chars()
.map(|c| Topoglyph {
glyph: c,
position: Position::start(),
bytes: 0..0,
})
.collect();
x.into()
}
fn glob(txt: &str) -> Token {
Token::Glob(lexeme(txt))
}
fn word(txt: &str) -> Token {
Token::Word(lexeme(txt))
}
/// this macro allows us to specify a set of inputs that we expect to lex successfully.
macro_rules! accept { macro_rules! accept {
($($name:ident: $line:literal ;)+) => {$( ( $($test_name:ident $input_text:literal [ $( $token:expr )* ])+) => {$(
#[test] #[test]
fn $name() { fn $test_name() -> Result<(), LexError> {
println!("testing that we can lex the following input text:\n\t{}", $line); #[allow(unused_mut)]
let lexer = Tokenizer::new($line); let mut expected: Vec<Token> = Vec::new();
let tokens: Result<Vec<Token>, LexError> = lexer.collect(); $( expected.push($token); )*
match tokens {
Ok(tokens) => { let lexed = lex($input_text)?;
println!("output tokens: {tokens:?}"); assert_eq!(expected.len(), lexed.len());
}
Err(e) => { for pair in zip(expected, lexed) {
println!("output error: {e:?}"); assert!(pair.0.same(&pair.1));
panic!("Encounter an unexpected lex error");
}
}
} }
)*};
Ok(())
}
)*};
} }
/// this macro allows us to specify a set of inputs that we expect to fail to lex successfully.
macro_rules! reject { macro_rules! reject {
($($name:ident: $line:literal ;)+) => {$( ($($test_name:ident: $input_text:literal;)+) => {$(
#[test] #[test]
fn $name() { fn $test_name() {
println!("testing that we will fail to lex the following input text:\n\t{}", $line); match lex($input_text) {
let tokens = lex($line); Ok(tokens) => {
match tokens { println!("output tokens: {tokens:?}");
Ok(tokens) => { panic!("Did not encounter an expected lex error");
println!("output tokens: {tokens:?}");
panic!("Did not encounter an expected lex error");
}
Err(e) => {
println!("output error: {e:?}");
}
} }
Err(e) => println!("output error: {e:?}"),
} }
)*}; }
)*};
} }
reject! { reject! {
@ -491,9 +513,6 @@ mod tests {
// input makes no sense // input makes no sense
trailing_slash: r"one two three \"; trailing_slash: r"one two three \";
// Globs aren't done yet
glob: "*";
// Vars aren't done yet // Vars aren't done yet
var: "@name"; var: "@name";
@ -505,19 +524,24 @@ mod tests {
} }
accept! { accept! {
empty: ""; empty "" []
spaces " " []
spaces: " "; identifier "a" [ word("a") ]
identifier_2 " a" [ word("a") ]
identifier: "a"; identifier_3 "a " [ word("a") ]
identifier_4 " a " [ word("a") ]
identifier_2: " a"; file_name "poop.exe" [ word("poop.exe") ]
multi_idents "one two three four " [
identifier_3: "a "; word("one")
word("two")
identifier_4: " a "; word("three")
word("four")
multi_idents: "one two three four "; ]
glob_1 "*" [ glob("*") ]
glob_2 " * " [ glob("*") ]
glob_3 "x*" [ glob("x*") ]
glob_4 "*x" [ glob("*x") ]
glob_5 "*.py" [ glob("*.py") ]
} }
} }

@ -139,7 +139,7 @@ impl<'text> Parser<'text> {
fn step_start(&mut self) -> Result<bool, ParseError> { fn step_start(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Start)); assert!(matches!(self.cursor.value(), Value::Start));
match self.source.peek()? { match self.source.peek()? {
Some(Token::String(_)) => { Some(Token::Word(_)) => {
self.cursor.push(Value::Statement)?; self.cursor.push(Value::Statement)?;
let token = self.source.next().unwrap()?; let token = self.source.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?; self.cursor.push(Value::Terminal(token))?;
@ -157,7 +157,7 @@ impl<'text> Parser<'text> {
fn step_statement(&mut self) -> Result<bool, ParseError> { fn step_statement(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Statement)); assert!(matches!(self.cursor.value(), Value::Statement));
match self.source.peek()? { match self.source.peek()? {
Some(Token::String(_) | Token::Glob(_)) => { Some(Token::Word(_) | Token::Glob(_)) => {
let token = self.source.next().unwrap()?; let token = self.source.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?; self.cursor.push(Value::Terminal(token))?;
self.cursor.up()?; self.cursor.up()?;

Loading…
Cancel
Save