From 5505cb48c64c9caccadbff40553c2ce3f2f3514a Mon Sep 17 00:00:00 2001 From: Jordan Orelli Date: Wed, 7 Feb 2024 20:51:16 -0600 Subject: [PATCH] you can now use a semicolon to have TWO statements --- src/error.rs | 15 ++++++ src/lex.rs | 22 +++++---- src/parse.rs | 132 +++++++++++++++++++++++++++++++++++++------------- src/syntax.rs | 54 ++++++++++++++++----- 4 files changed, 168 insertions(+), 55 deletions(-) diff --git a/src/error.rs b/src/error.rs index 950c473..1e40b27 100644 --- a/src/error.rs +++ b/src/error.rs @@ -29,6 +29,12 @@ pub enum LexError { #[error("unexpected character {g} at {pos:?}", g = .0.glyph, pos = .0.position)] UnexpectedCharacter(Glyph), + #[error("unexpected character {g} at {pos:?} while lexing a bare string", g = .0.glyph, pos = .0.position)] + UnexpectedCharacterInBareString(Glyph), + + #[error("unexpected character {g} at {pos:?} while lexing a glob", g = .0.glyph, pos = .0.position)] + UnexpectedCharacterInGlob(Glyph), + #[error("unexpected eof")] UnexpectedEOF, @@ -61,6 +67,15 @@ pub enum ParseError { #[error("Illegal attempt to push a value as a child to a terminal value")] PushOntoTerminal, + + #[error("Statement node has no children")] + StatementIsEmpty, + + #[error("dangling")] + DanglingElements, + + #[error("you wouldn't parse a semicolon")] + WhyParseSemicolon, } #[derive(Debug, Error)] diff --git a/src/lex.rs b/src/lex.rs index a885461..820b524 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -68,6 +68,8 @@ pub enum Token { /// A bare word containing 1 or more of the special characters ? or * Glob(Lexeme), + + Semi(Glyph), } impl Token { @@ -96,6 +98,7 @@ impl Token { match self { Word(lexeme) | Glob(lexeme) => lexeme.text(), + Semi(glyph) => String::from(glyph.glyph), } } } @@ -121,13 +124,10 @@ impl<'text> Tokenizer<'text> { match next.glyph { _ if next.is_word() => Some(self.lex_bare_string(vec![next])), _ if next.is_glob() => Some(self.lex_glob(vec![next])), - // '\\' => match self.source.pop() { - // Ok(escaped) => Some(self.lex_bare_string(vec![escaped])), - // Err(e) => Some(Err(e)), - // }, '@' => Some(self.lex_var(vec![next])), '\'' => Some(self.lex_raw_string(vec![next])), '"' => Some(self.lex_interp_string(vec![next])), + ';' => Some(Ok(Token::Semi(next))), _ => Some(Err(LexError::UnexpectedCharacter(next))), } } @@ -141,11 +141,12 @@ impl<'text> Tokenizer<'text> { progress.push(self.source.pop()?); return self.lex_glob(progress); } - // '\\' => { - // self.source.pop()?; - // progress.push(self.source.pop()?); - // } - _ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)), + ';' => break, + _ => { + return Err(LexError::UnexpectedCharacterInBareString( + self.source.pop()?, + )) + } } } @@ -159,13 +160,14 @@ impl<'text> Tokenizer<'text> { fn lex_glob(&mut self, mut progress: Vec) -> Result { while let Some(next) = self.source.peek() { match next.glyph { + ';' => break, _ if next.glyph.is_whitespace() => break, _ if next.is_glob() => progress.push(self.source.pop()?), // '\\' => { // self.source.pop()?; // progress.push(self.source.pop()?); // } - _ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)), + _ => return Err(LexError::UnexpectedCharacterInGlob(self.source.pop()?)), } } diff --git a/src/parse.rs b/src/parse.rs index df0f8bf..5b60c74 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -6,14 +6,19 @@ use std::{ sync::atomic::AtomicUsize, }; +/// The contents of a node in our parse tree. The parse tree consists of both terminal and +/// nonterminal symbols. #[derive(Debug, PartialEq)] pub enum Value { /// The start symbol of our parse tree. Each parse tree is rooted in a node whose value is the /// start symbol. This is the only node in the tree that should utilize the start symbol. Start, + /// The children of a statement symbol make up the components of what will become a statement + /// in our ast Statement, + /// Each of the tokens from the lex stage becomes a terminal node on our tree Terminal(Token), } @@ -43,19 +48,28 @@ pub struct Node { impl Node { fn new() -> Cursor { - let root = Node { + let root = Rc::new(Node { id: next_id(), parent: None, value: Value::Start, children: RefCell::new(Vec::new()), - }; - - let root = Rc::new(root); + }); Cursor { target: Rc::clone(&root), + prev: root.id, root, } } + + pub fn is_semi(&self) -> bool { + matches!(self.value, Value::Terminal(Token::Semi(_))) + } +} + +impl PartialEq for Node { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } } pub struct ChildIter { @@ -74,6 +88,7 @@ impl Iterator for ChildIter { Some(Cursor { target: Rc::clone(v), root: Rc::clone(&self.root), + prev: self.target.id, }) } } @@ -83,11 +98,15 @@ fn next_id() -> usize { LAST_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) } -/// Cursor values expose access to a parse tree. +/// Cursor values expose access to a parse tree. A cursor is logically a pointer to a single node +/// within a parse tree. The cursor helps with the ownership structure: so long as there is a +/// cursor to any node on the tree, the tree remains in memory. Once there are no cursors pointed +/// at the tree, it is dropped. #[derive(Debug)] pub struct Cursor { pub target: Rc, root: Rc, + prev: usize, } impl Cursor { @@ -99,7 +118,7 @@ impl Cursor { None => Err(ParseError::AtRootAlready), Some(parent) => match parent.upgrade() { Some(parent) => { - self.target = parent; + self.goto(parent); Ok(()) } None => Err(ParseError::ParentIsGone), @@ -107,6 +126,40 @@ impl Cursor { } } + /// moves the cursor horizontally in the tree, selecting the node that appears after the + /// current target node + pub fn next_sibling(&mut self) -> Result { + let next = match self.pick_next_sibling()? { + Some(node) => node, + None => return Ok(false), + }; + self.prev = self.target.id; + self.target = next; + Ok(true) + } + + fn pick_next_sibling(&mut self) -> Result>, ParseError> { + let parent = self + .target + .parent + .clone() + .ok_or_else(|| ParseError::AtRootAlready)?; + + // SAFETY: this is ok because the cursor always retains a pointer to the root of the tree, + // so we know that since we have a cursor, the parent cannot yet be dropped + let parent = parent.upgrade().unwrap(); + let mut found_self = false; + for child in parent.children.borrow().iter() { + if found_self { + return Ok(Some(child.clone())); + } + if child.id == self.target.id { + found_self = true; + } + } + Ok(None) + } + /// Adds a value to the children of the current target node, then descends to select that /// child. fn push(&mut self, v: Value) -> Result<(), ParseError> { @@ -124,12 +177,12 @@ impl Cursor { .children .try_borrow_mut()? .push(Rc::clone(&node)); - self.target = node; + self.goto(node); Ok(()) } pub fn up_to_root(&mut self) { - self.target = Rc::clone(&self.root); + self.goto(Rc::clone(&self.root)); } pub fn value(&self) -> &Value { @@ -154,35 +207,33 @@ impl Cursor { Rc::clone(&self.root) } - // pub fn render_textree(&self, w: &mut W, depth: u32) { - // write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2); - // for child in self.iter_children() { - // child.render_textree(w, depth + 1); - // } - // } + fn goto(&mut self, next: Rc) { + self.prev = self.target.id; + self.target = next; + } } pub struct Parser<'text> { - source: Lexer<'text>, - cursor: Cursor, + input: Lexer<'text>, + output: Cursor, } impl<'text> Parser<'text> { pub fn new(source: Lexer<'text>) -> Self { Self { - source, - cursor: Node::new(), + input: source, + output: Node::new(), } } pub fn parse(mut self) -> Result { while self.step()? {} - self.cursor.up_to_root(); - Ok(self.cursor) + self.output.up_to_root(); + Ok(self.output) } fn step(&mut self) -> Result { - match self.cursor.value() { + match self.output.value() { Value::Start => self.step_start(), Value::Statement => self.step_statement(), Value::Terminal(_) => panic!(), @@ -190,30 +241,45 @@ impl<'text> Parser<'text> { } fn step_start(&mut self) -> Result { - assert!(matches!(self.cursor.value(), Value::Start)); - match self.source.peek()? { + assert!(matches!(self.output.value(), Value::Start)); + match self.input.peek()? { Some(Token::Word(_)) => { - self.cursor.push(Value::Statement)?; - let token = self.source.next().unwrap()?; - self.cursor.push(Value::Terminal(token))?; - self.cursor.up()?; + self.output.push(Value::Statement)?; + let token = self.input.next().unwrap()?; + self.output.push(Value::Terminal(token))?; + self.output.up()?; Ok(true) } Some(Token::Glob(_)) => { - let token = self.source.next().unwrap()?; + let token = self.input.next().unwrap()?; Err(ParseError::UnexpectedToken(token)) } + Some(Token::Semi(_)) => { + self.output.push(Value::Statement)?; + let token = self.input.next().unwrap()?; + self.output.push(Value::Terminal(token))?; + self.output.up()?; + self.output.up()?; + Ok(true) + } None => Ok(false), } } fn step_statement(&mut self) -> Result { - assert!(matches!(self.cursor.value(), Value::Statement)); - match self.source.peek()? { + assert!(matches!(self.output.value(), Value::Statement)); + match self.input.peek()? { Some(Token::Word(_) | Token::Glob(_)) => { - let token = self.source.next().unwrap()?; - self.cursor.push(Value::Terminal(token))?; - self.cursor.up()?; + let token = self.input.next().unwrap()?; + self.output.push(Value::Terminal(token))?; + self.output.up()?; + Ok(true) + } + Some(Token::Semi(_)) => { + let token = self.input.next().unwrap()?; + self.output.push(Value::Terminal(token))?; + self.output.up()?; + self.output.up()?; Ok(true) } None => Ok(false), diff --git a/src/syntax.rs b/src/syntax.rs index 5e91814..d35c034 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -7,10 +7,16 @@ use crate::{ }; use std::{collections::HashSet, process}; +/// The differnt types of nodes that may appear in our AST #[derive(Debug)] pub enum Element { + /// A Block is a list of statements Block(Block), + + /// A Command represents the desire to execute a command Command(Command), + + /// A literal is a ... literal value Literal(Value), } @@ -64,7 +70,7 @@ impl Command { builtin.call(ctx, &args) } - fn exec_command(&self, ctx: &mut Context, name: &str) -> Result { + fn exec_subprocess(&self, ctx: &mut Context, name: &str) -> Result { let args = self .args .iter() @@ -92,11 +98,10 @@ impl Command { impl Eval for Command { fn eval(&self, ctx: &mut Context) -> Result { - let name = self.name.eval(ctx)?; - let name = name.try_as_str()?; - match ctx.state.builtin(name) { + let name = self.name.eval(ctx)?.try_to_string()?; + match ctx.state.builtin(&name) { Some(builtin) => self.exec_builtin(ctx, builtin), - None => self.exec_command(ctx, name), + None => self.exec_subprocess(ctx, &name), } } } @@ -112,13 +117,16 @@ impl TreeBuilder { } } - fn descend(&mut self, source: &mut parse::Cursor) -> Result { + fn parse(&mut self, source: &mut parse::Cursor) -> Result { let e = match source.value() { parse::Value::Start => { let mut root = Block::new(); let children = source.iter_children(); for mut child in children { - let e = self.descend(&mut child)?; + if child.target.is_semi() { + continue; + } + let e = self.parse(&mut child)?; match e { Element::Command(cmd) => root.commands.push(cmd), _ => panic!(), @@ -128,15 +136,34 @@ impl TreeBuilder { } parse::Value::Statement => { let mut children = source.iter_children(); - let mut first = children.next().unwrap(); - let name = self.descend(&mut first)?; + let mut first = match children.next() { + Some(child) => child, + None => return Err(ParseError::StatementIsEmpty), + }; + if first.target.is_semi() { + return Err(ParseError::StatementIsEmpty); + } + let name = self.parse(&mut first)?; let mut cmd = Command { name: Box::new(name), args: Vec::new(), }; + + // we set complete to true when we find a semicolon. If there are any parse nodes + // that appear -after- a semicolon, that indicates a bug in the prior stage. + let mut complete = false; for mut child in children { - let e = self.descend(&mut child)?; - cmd.args.push(e); + if child.target.is_semi() { + complete = true; + continue; + } + + if complete { + return Err(ParseError::DanglingElements); + } else { + let e = self.parse(&mut child)?; + cmd.args.push(e); + } } Element::Command(cmd) } @@ -146,6 +173,9 @@ impl TreeBuilder { parse::Value::Terminal(Token::Glob(_)) => { todo!() } + parse::Value::Terminal(Token::Semi(_)) => { + return Err(ParseError::WhyParseSemicolon); + } }; self.visited.insert(source.target.id); Ok(e) @@ -157,7 +187,7 @@ pub fn parse(source: &str) -> Result { let parser = parse::Parser::new(tokens); let mut parse_tree = parser.parse()?; let mut builder = TreeBuilder::new(); - builder.descend(&mut parse_tree) + builder.parse(&mut parse_tree) } #[cfg(test)]