you can now use a semicolon to have TWO statements

11 months ago · 5505cb48c6
parent be5bebf25b
commit 5505cb48c6
4 changed files with 168 additions and 55 deletions
--- a/src/error.rs
+++ b/src/error.rs
@ -29,6 +29,12 @@ pub enum LexError {
    #[error("unexpected character {g} at {pos:?}", g = .0.glyph, pos = .0.position)]
    UnexpectedCharacter(Glyph),

+    #[error("unexpected character {g} at {pos:?} while lexing a bare string", g = .0.glyph, pos = .0.position)]
+    UnexpectedCharacterInBareString(Glyph),
+
+    #[error("unexpected character {g} at {pos:?} while lexing a glob", g = .0.glyph, pos = .0.position)]
+    UnexpectedCharacterInGlob(Glyph),
+
    #[error("unexpected eof")]
    UnexpectedEOF,

@ -61,6 +67,15 @@ pub enum ParseError {

    #[error("Illegal attempt to push a value as a child to a terminal value")]
    PushOntoTerminal,
+
+    #[error("Statement node has no children")]
+    StatementIsEmpty,
+
+    #[error("dangling")]
+    DanglingElements,
+
+    #[error("you wouldn't parse a semicolon")]
+    WhyParseSemicolon,
 }

 #[derive(Debug, Error)]
--- a/src/lex.rs
+++ b/src/lex.rs
@ -68,6 +68,8 @@ pub enum Token {

    /// A bare word containing 1 or more of the special characters ? or *
    Glob(Lexeme),
+
+    Semi(Glyph),
 }

 impl Token {
@ -96,6 +98,7 @@ impl Token {

        match self {
            Word(lexeme) | Glob(lexeme) => lexeme.text(),
+            Semi(glyph) => String::from(glyph.glyph),
        }
    }
 }
@ -121,13 +124,10 @@ impl<'text> Tokenizer<'text> {
        match next.glyph {
            _ if next.is_word() => Some(self.lex_bare_string(vec![next])),
            _ if next.is_glob() => Some(self.lex_glob(vec![next])),
-            // '\\' => match self.source.pop() {
-            //     Ok(escaped) => Some(self.lex_bare_string(vec![escaped])),
-            //     Err(e) => Some(Err(e)),
-            // },
            '@' => Some(self.lex_var(vec![next])),
            '\'' => Some(self.lex_raw_string(vec![next])),
            '"' => Some(self.lex_interp_string(vec![next])),
+            ';' => Some(Ok(Token::Semi(next))),
            _ => Some(Err(LexError::UnexpectedCharacter(next))),
        }
    }
@ -141,11 +141,12 @@ impl<'text> Tokenizer<'text> {
                    progress.push(self.source.pop()?);
                    return self.lex_glob(progress);
                }
-                // '\\' => {
-                //     self.source.pop()?;
-                //     progress.push(self.source.pop()?);
-                // }
-                _ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
+                ';' => break,
+                _ => {
+                    return Err(LexError::UnexpectedCharacterInBareString(
+                        self.source.pop()?,
+                    ))
+                }
            }
        }

@ -159,13 +160,14 @@ impl<'text> Tokenizer<'text> {
    fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
        while let Some(next) = self.source.peek() {
            match next.glyph {
+                ';' => break,
                _ if next.glyph.is_whitespace() => break,
                _ if next.is_glob() => progress.push(self.source.pop()?),
                // '\\' => {
                //     self.source.pop()?;
                //     progress.push(self.source.pop()?);
                // }
-                _ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
+                _ => return Err(LexError::UnexpectedCharacterInGlob(self.source.pop()?)),
            }
        }

--- a/src/parse.rs
+++ b/src/parse.rs
@ -6,14 +6,19 @@ use std::{
    sync::atomic::AtomicUsize,
 };

+/// The contents of a node in our parse tree. The parse tree consists of both terminal and
+/// nonterminal symbols.
 #[derive(Debug, PartialEq)]
 pub enum Value {
    /// The start symbol of our parse tree. Each parse tree is rooted in a node whose value is the
    /// start symbol. This is the only node in the tree that should utilize the start symbol.
    Start,

+    /// The children of a statement symbol make up the components of what will become a statement
+    /// in our ast
    Statement,

+    /// Each of the tokens from the lex stage becomes a terminal node on our tree
    Terminal(Token),
 }

@ -43,19 +48,28 @@ pub struct Node {

 impl Node {
    fn new() -> Cursor {
-        let root = Node {
+        let root = Rc::new(Node {
            id: next_id(),
            parent: None,
            value: Value::Start,
            children: RefCell::new(Vec::new()),
-        };
-
-        let root = Rc::new(root);
+        });
        Cursor {
            target: Rc::clone(&root),
+            prev: root.id,
            root,
        }
    }
+
+    pub fn is_semi(&self) -> bool {
+        matches!(self.value, Value::Terminal(Token::Semi(_)))
+    }
+}
+
+impl PartialEq for Node {
+    fn eq(&self, other: &Self) -> bool {
+        self.id == other.id
+    }
 }

 pub struct ChildIter {
@ -74,6 +88,7 @@ impl Iterator for ChildIter {
        Some(Cursor {
            target: Rc::clone(v),
            root: Rc::clone(&self.root),
+            prev: self.target.id,
        })
    }
 }
@ -83,11 +98,15 @@ fn next_id() -> usize {
    LAST_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
 }

-/// Cursor values expose access to a parse tree.
+/// Cursor values expose access to a parse tree. A cursor is logically a pointer to a single node
+/// within a parse tree. The cursor helps with the ownership structure: so long as there is a
+/// cursor to any node on the tree, the tree remains in memory. Once there are no cursors pointed
+/// at the tree, it is dropped.
 #[derive(Debug)]
 pub struct Cursor {
    pub target: Rc<Node>,
    root: Rc<Node>,
+    prev: usize,
 }

 impl Cursor {
@ -99,7 +118,7 @@ impl Cursor {
            None => Err(ParseError::AtRootAlready),
            Some(parent) => match parent.upgrade() {
                Some(parent) => {
-                    self.target = parent;
+                    self.goto(parent);
                    Ok(())
                }
                None => Err(ParseError::ParentIsGone),
@ -107,6 +126,40 @@ impl Cursor {
        }
    }

+    /// moves the cursor horizontally in the tree, selecting the node that appears after the
+    /// current target node
+    pub fn next_sibling(&mut self) -> Result<bool, ParseError> {
+        let next = match self.pick_next_sibling()? {
+            Some(node) => node,
+            None => return Ok(false),
+        };
+        self.prev = self.target.id;
+        self.target = next;
+        Ok(true)
+    }
+
+    fn pick_next_sibling(&mut self) -> Result<Option<Rc<Node>>, ParseError> {
+        let parent = self
+            .target
+            .parent
+            .clone()
+            .ok_or_else(|| ParseError::AtRootAlready)?;
+
+        // SAFETY: this is ok because the cursor always retains a pointer to the root of the tree,
+        // so we know that since we have a cursor, the parent cannot yet be dropped
+        let parent = parent.upgrade().unwrap();
+        let mut found_self = false;
+        for child in parent.children.borrow().iter() {
+            if found_self {
+                return Ok(Some(child.clone()));
+            }
+            if child.id == self.target.id {
+                found_self = true;
+            }
+        }
+        Ok(None)
+    }
+
    /// Adds a value to the children of the current target node, then descends to select that
    /// child.
    fn push(&mut self, v: Value) -> Result<(), ParseError> {
@ -124,12 +177,12 @@ impl Cursor {
            .children
            .try_borrow_mut()?
            .push(Rc::clone(&node));
-        self.target = node;
+        self.goto(node);
        Ok(())
    }

    pub fn up_to_root(&mut self) {
-        self.target = Rc::clone(&self.root);
+        self.goto(Rc::clone(&self.root));
    }

    pub fn value(&self) -> &Value {
@ -154,35 +207,33 @@ impl Cursor {
        Rc::clone(&self.root)
    }

-    // pub fn render_textree<W: Write>(&self, w: &mut W, depth: u32) {
-    //     write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2);
-    //     for child in self.iter_children() {
-    //         child.render_textree(w, depth + 1);
-    //     }
-    // }
+    fn goto(&mut self, next: Rc<Node>) {
+        self.prev = self.target.id;
+        self.target = next;
+    }
 }

 pub struct Parser<'text> {
-    source: Lexer<'text>,
-    cursor: Cursor,
+    input: Lexer<'text>,
+    output: Cursor,
 }

 impl<'text> Parser<'text> {
    pub fn new(source: Lexer<'text>) -> Self {
        Self {
-            source,
-            cursor: Node::new(),
+            input: source,
+            output: Node::new(),
        }
    }

    pub fn parse(mut self) -> Result<Cursor, ParseError> {
        while self.step()? {}
-        self.cursor.up_to_root();
-        Ok(self.cursor)
+        self.output.up_to_root();
+        Ok(self.output)
    }

    fn step(&mut self) -> Result<bool, ParseError> {
-        match self.cursor.value() {
+        match self.output.value() {
            Value::Start => self.step_start(),
            Value::Statement => self.step_statement(),
            Value::Terminal(_) => panic!(),
@ -190,30 +241,45 @@ impl<'text> Parser<'text> {
    }

    fn step_start(&mut self) -> Result<bool, ParseError> {
-        assert!(matches!(self.cursor.value(), Value::Start));
-        match self.source.peek()? {
+        assert!(matches!(self.output.value(), Value::Start));
+        match self.input.peek()? {
            Some(Token::Word(_)) => {
-                self.cursor.push(Value::Statement)?;
-                let token = self.source.next().unwrap()?;
-                self.cursor.push(Value::Terminal(token))?;
-                self.cursor.up()?;
+                self.output.push(Value::Statement)?;
+                let token = self.input.next().unwrap()?;
+                self.output.push(Value::Terminal(token))?;
+                self.output.up()?;
                Ok(true)
            }
            Some(Token::Glob(_)) => {
-                let token = self.source.next().unwrap()?;
+                let token = self.input.next().unwrap()?;
                Err(ParseError::UnexpectedToken(token))
            }
+            Some(Token::Semi(_)) => {
+                self.output.push(Value::Statement)?;
+                let token = self.input.next().unwrap()?;
+                self.output.push(Value::Terminal(token))?;
+                self.output.up()?;
+                self.output.up()?;
+                Ok(true)
+            }
            None => Ok(false),
        }
    }

    fn step_statement(&mut self) -> Result<bool, ParseError> {
-        assert!(matches!(self.cursor.value(), Value::Statement));
-        match self.source.peek()? {
+        assert!(matches!(self.output.value(), Value::Statement));
+        match self.input.peek()? {
            Some(Token::Word(_) | Token::Glob(_)) => {
-                let token = self.source.next().unwrap()?;
-                self.cursor.push(Value::Terminal(token))?;
-                self.cursor.up()?;
+                let token = self.input.next().unwrap()?;
+                self.output.push(Value::Terminal(token))?;
+                self.output.up()?;
+                Ok(true)
+            }
+            Some(Token::Semi(_)) => {
+                let token = self.input.next().unwrap()?;
+                self.output.push(Value::Terminal(token))?;
+                self.output.up()?;
+                self.output.up()?;
                Ok(true)
            }
            None => Ok(false),
--- a/src/syntax.rs
+++ b/src/syntax.rs
@ -7,10 +7,16 @@ use crate::{
 };
 use std::{collections::HashSet, process};

+/// The differnt types of nodes that may appear in our AST
 #[derive(Debug)]
 pub enum Element {
+    /// A Block is a list of statements
    Block(Block),
+
+    /// A Command represents the desire to execute a command
    Command(Command),
+
+    /// A literal is a ... literal value
    Literal(Value),
 }

@ -64,7 +70,7 @@ impl Command {
        builtin.call(ctx, &args)
    }

-    fn exec_command(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> {
+    fn exec_subprocess(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> {
        let args = self
            .args
            .iter()
@ -92,11 +98,10 @@ impl Command {

 impl Eval for Command {
    fn eval(&self, ctx: &mut Context) -> Result<Value, ExecError> {
-        let name = self.name.eval(ctx)?;
-        let name = name.try_as_str()?;
-        match ctx.state.builtin(name) {
+        let name = self.name.eval(ctx)?.try_to_string()?;
+        match ctx.state.builtin(&name) {
            Some(builtin) => self.exec_builtin(ctx, builtin),
-            None => self.exec_command(ctx, name),
+            None => self.exec_subprocess(ctx, &name),
        }
    }
 }
@ -112,13 +117,16 @@ impl TreeBuilder {
        }
    }

-    fn descend(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> {
+    fn parse(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> {
        let e = match source.value() {
            parse::Value::Start => {
                let mut root = Block::new();
                let children = source.iter_children();
                for mut child in children {
-                    let e = self.descend(&mut child)?;
+                    if child.target.is_semi() {
+                        continue;
+                    }
+                    let e = self.parse(&mut child)?;
                    match e {
                        Element::Command(cmd) => root.commands.push(cmd),
                        _ => panic!(),
@ -128,15 +136,34 @@ impl TreeBuilder {
            }
            parse::Value::Statement => {
                let mut children = source.iter_children();
-                let mut first = children.next().unwrap();
-                let name = self.descend(&mut first)?;
+                let mut first = match children.next() {
+                    Some(child) => child,
+                    None => return Err(ParseError::StatementIsEmpty),
+                };
+                if first.target.is_semi() {
+                    return Err(ParseError::StatementIsEmpty);
+                }
+                let name = self.parse(&mut first)?;
                let mut cmd = Command {
                    name: Box::new(name),
                    args: Vec::new(),
                };
+
+                // we set complete to true when we find a semicolon. If there are any parse nodes
+                // that appear -after- a semicolon, that indicates a bug in the prior stage.
+                let mut complete = false;
                for mut child in children {
-                    let e = self.descend(&mut child)?;
-                    cmd.args.push(e);
+                    if child.target.is_semi() {
+                        complete = true;
+                        continue;
+                    }
+
+                    if complete {
+                        return Err(ParseError::DanglingElements);
+                    } else {
+                        let e = self.parse(&mut child)?;
+                        cmd.args.push(e);
+                    }
                }
                Element::Command(cmd)
            }
@ -146,6 +173,9 @@ impl TreeBuilder {
            parse::Value::Terminal(Token::Glob(_)) => {
                todo!()
            }
+            parse::Value::Terminal(Token::Semi(_)) => {
+                return Err(ParseError::WhyParseSemicolon);
+            }
        };
        self.visited.insert(source.target.id);
        Ok(e)
@ -157,7 +187,7 @@ pub fn parse(source: &str) -> Result<Element, ParseError> {
    let parser = parse::Parser::new(tokens);
    let mut parse_tree = parser.parse()?;
    let mut builder = TreeBuilder::new();
-    builder.descend(&mut parse_tree)
+    builder.parse(&mut parse_tree)
 }

 #[cfg(test)]