you can now use a semicolon to have TWO statements

11 months ago · 5505cb48c6
parent be5bebf25b
commit 5505cb48c6
4 changed files with 168 additions and 55 deletions
--- a/src/error.rs
+++ b/src/error.rs
@ -29,6 +29,12 @@ pub enum LexError {
    #[error("unexpected character {g} at {pos:?}", g = .0.glyph, pos = .0.position)]
    UnexpectedCharacter(Glyph),
    #[error("unexpected character {g} at {pos:?} while lexing a bare string", g = .0.glyph, pos = .0.position)]
    UnexpectedCharacterInBareString(Glyph),
    #[error("unexpected character {g} at {pos:?} while lexing a glob", g = .0.glyph, pos = .0.position)]
    UnexpectedCharacterInGlob(Glyph),
    #[error("unexpected eof")]
    UnexpectedEOF,
@ -61,6 +67,15 @@ pub enum ParseError {
    #[error("Illegal attempt to push a value as a child to a terminal value")]
    PushOntoTerminal,
    #[error("Statement node has no children")]
    StatementIsEmpty,
    #[error("dangling")]
    DanglingElements,
    #[error("you wouldn't parse a semicolon")]
    WhyParseSemicolon,
 }
 #[derive(Debug, Error)]
--- a/src/lex.rs
+++ b/src/lex.rs
@ -68,6 +68,8 @@ pub enum Token {
    /// A bare word containing 1 or more of the special characters ? or *
    Glob(Lexeme),
    Semi(Glyph),
 }
 impl Token {
@ -96,6 +98,7 @@ impl Token {
        match self {
            Word(lexeme) | Glob(lexeme) => lexeme.text(),
            Semi(glyph) => String::from(glyph.glyph),
        }
    }
 }
@ -121,13 +124,10 @@ impl<'text> Tokenizer<'text> {
        match next.glyph {
            _ if next.is_word() => Some(self.lex_bare_string(vec![next])),
            _ if next.is_glob() => Some(self.lex_glob(vec![next])),
            // '\\' => match self.source.pop() {
            //     Ok(escaped) => Some(self.lex_bare_string(vec![escaped])),
            //     Err(e) => Some(Err(e)),
            // },
            '@' => Some(self.lex_var(vec![next])),
            '\'' => Some(self.lex_raw_string(vec![next])),
            '"' => Some(self.lex_interp_string(vec![next])),
            ';' => Some(Ok(Token::Semi(next))),
            _ => Some(Err(LexError::UnexpectedCharacter(next))),
        }
    }
@ -141,11 +141,12 @@ impl<'text> Tokenizer<'text> {
                    progress.push(self.source.pop()?);
                    return self.lex_glob(progress);
                }
-                // '\\' => {
+                ';' => break,
-                //     self.source.pop()?;
+                _ => {
-                //     progress.push(self.source.pop()?);
+                    return Err(LexError::UnexpectedCharacterInBareString(
-                // }
+                        self.source.pop()?,
-                _ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
+                    ))
                }
            }
        }
@ -159,13 +160,14 @@ impl<'text> Tokenizer<'text> {
    fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
        while let Some(next) = self.source.peek() {
            match next.glyph {
                ';' => break,
                _ if next.glyph.is_whitespace() => break,
                _ if next.is_glob() => progress.push(self.source.pop()?),
                // '\\' => {
                //     self.source.pop()?;
                //     progress.push(self.source.pop()?);
                // }
-                _ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
+                _ => return Err(LexError::UnexpectedCharacterInGlob(self.source.pop()?)),
            }
        }
--- a/src/parse.rs
+++ b/src/parse.rs
@ -6,14 +6,19 @@ use std::{
    sync::atomic::AtomicUsize,
 };
 /// The contents of a node in our parse tree. The parse tree consists of both terminal and
 /// nonterminal symbols.
 #[derive(Debug, PartialEq)]
 pub enum Value {
    /// The start symbol of our parse tree. Each parse tree is rooted in a node whose value is the
    /// start symbol. This is the only node in the tree that should utilize the start symbol.
    Start,
    /// The children of a statement symbol make up the components of what will become a statement
    /// in our ast
    Statement,
    /// Each of the tokens from the lex stage becomes a terminal node on our tree
    Terminal(Token),
 }
@ -43,19 +48,28 @@ pub struct Node {
 impl Node {
    fn new() -> Cursor {
-        let root = Node {
+        let root = Rc::new(Node {
            id: next_id(),
            parent: None,
            value: Value::Start,
            children: RefCell::new(Vec::new()),
-        };
+        });
        let root = Rc::new(root);
        Cursor {
            target: Rc::clone(&root),
            prev: root.id,
            root,
        }
    }
    pub fn is_semi(&self) -> bool {
        matches!(self.value, Value::Terminal(Token::Semi(_)))
    }
 }
 impl PartialEq for Node {
    fn eq(&self, other: &Self) -> bool {
        self.id == other.id
    }
 }
 pub struct ChildIter {
@ -74,6 +88,7 @@ impl Iterator for ChildIter {
        Some(Cursor {
            target: Rc::clone(v),
            root: Rc::clone(&self.root),
            prev: self.target.id,
        })
    }
 }
@ -83,11 +98,15 @@ fn next_id() -> usize {
    LAST_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
 }
-/// Cursor values expose access to a parse tree.
+/// Cursor values expose access to a parse tree. A cursor is logically a pointer to a single node
 /// within a parse tree. The cursor helps with the ownership structure: so long as there is a
 /// cursor to any node on the tree, the tree remains in memory. Once there are no cursors pointed
 /// at the tree, it is dropped.
 #[derive(Debug)]
 pub struct Cursor {
    pub target: Rc<Node>,
    root: Rc<Node>,
    prev: usize,
 }
 impl Cursor {
@ -99,7 +118,7 @@ impl Cursor {
            None => Err(ParseError::AtRootAlready),
            Some(parent) => match parent.upgrade() {
                Some(parent) => {
-                    self.target = parent;
+                    self.goto(parent);
                    Ok(())
                }
                None => Err(ParseError::ParentIsGone),
@ -107,6 +126,40 @@ impl Cursor {
        }
    }
    /// moves the cursor horizontally in the tree, selecting the node that appears after the
    /// current target node
    pub fn next_sibling(&mut self) -> Result<bool, ParseError> {
        let next = match self.pick_next_sibling()? {
            Some(node) => node,
            None => return Ok(false),
        };
        self.prev = self.target.id;
        self.target = next;
        Ok(true)
    }
    fn pick_next_sibling(&mut self) -> Result<Option<Rc<Node>>, ParseError> {
        let parent = self
            .target
            .parent
            .clone()
            .ok_or_else(|| ParseError::AtRootAlready)?;
        // SAFETY: this is ok because the cursor always retains a pointer to the root of the tree,
        // so we know that since we have a cursor, the parent cannot yet be dropped
        let parent = parent.upgrade().unwrap();
        let mut found_self = false;
        for child in parent.children.borrow().iter() {
            if found_self {
                return Ok(Some(child.clone()));
            }
            if child.id == self.target.id {
                found_self = true;
            }
        }
        Ok(None)
    }
    /// Adds a value to the children of the current target node, then descends to select that
    /// child.
    fn push(&mut self, v: Value) -> Result<(), ParseError> {
@ -124,12 +177,12 @@ impl Cursor {
            .children
            .try_borrow_mut()?
            .push(Rc::clone(&node));
-        self.target = node;
+        self.goto(node);
        Ok(())
    }
    pub fn up_to_root(&mut self) {
-        self.target = Rc::clone(&self.root);
+        self.goto(Rc::clone(&self.root));
    }
    pub fn value(&self) -> &Value {
@ -154,35 +207,33 @@ impl Cursor {
        Rc::clone(&self.root)
    }
-    // pub fn render_textree<W: Write>(&self, w: &mut W, depth: u32) {
+    fn goto(&mut self, next: Rc<Node>) {
-    //     write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2);
+        self.prev = self.target.id;
-    //     for child in self.iter_children() {
+        self.target = next;
-    //         child.render_textree(w, depth + 1);
+    }
    //     }
    // }
 }
 pub struct Parser<'text> {
-    source: Lexer<'text>,
+    input: Lexer<'text>,
-    cursor: Cursor,
+    output: Cursor,
 }
 impl<'text> Parser<'text> {
    pub fn new(source: Lexer<'text>) -> Self {
        Self {
-            source,
+            input: source,
-            cursor: Node::new(),
+            output: Node::new(),
        }
    }
    pub fn parse(mut self) -> Result<Cursor, ParseError> {
        while self.step()? {}
-        self.cursor.up_to_root();
+        self.output.up_to_root();
-        Ok(self.cursor)
+        Ok(self.output)
    }
    fn step(&mut self) -> Result<bool, ParseError> {
-        match self.cursor.value() {
+        match self.output.value() {
            Value::Start => self.step_start(),
            Value::Statement => self.step_statement(),
            Value::Terminal(_) => panic!(),
@ -190,30 +241,45 @@ impl<'text> Parser<'text> {
    }
    fn step_start(&mut self) -> Result<bool, ParseError> {
-        assert!(matches!(self.cursor.value(), Value::Start));
+        assert!(matches!(self.output.value(), Value::Start));
-        match self.source.peek()? {
+        match self.input.peek()? {
            Some(Token::Word(_)) => {
-                self.cursor.push(Value::Statement)?;
+                self.output.push(Value::Statement)?;
-                let token = self.source.next().unwrap()?;
+                let token = self.input.next().unwrap()?;
-                self.cursor.push(Value::Terminal(token))?;
+                self.output.push(Value::Terminal(token))?;
-                self.cursor.up()?;
+                self.output.up()?;
                Ok(true)
            }
            Some(Token::Glob(_)) => {
-                let token = self.source.next().unwrap()?;
+                let token = self.input.next().unwrap()?;
                Err(ParseError::UnexpectedToken(token))
            }
            Some(Token::Semi(_)) => {
                self.output.push(Value::Statement)?;
                let token = self.input.next().unwrap()?;
                self.output.push(Value::Terminal(token))?;
                self.output.up()?;
                self.output.up()?;
                Ok(true)
            }
            None => Ok(false),
        }
    }
    fn step_statement(&mut self) -> Result<bool, ParseError> {
-        assert!(matches!(self.cursor.value(), Value::Statement));
+        assert!(matches!(self.output.value(), Value::Statement));
-        match self.source.peek()? {
+        match self.input.peek()? {
            Some(Token::Word(_) | Token::Glob(_)) => {
-                let token = self.source.next().unwrap()?;
+                let token = self.input.next().unwrap()?;
-                self.cursor.push(Value::Terminal(token))?;
+                self.output.push(Value::Terminal(token))?;
-                self.cursor.up()?;
+                self.output.up()?;
                Ok(true)
            }
            Some(Token::Semi(_)) => {
                let token = self.input.next().unwrap()?;
                self.output.push(Value::Terminal(token))?;
                self.output.up()?;
                self.output.up()?;
                Ok(true)
            }
            None => Ok(false),
--- a/src/syntax.rs
+++ b/src/syntax.rs
@ -7,10 +7,16 @@ use crate::{
 };
 use std::{collections::HashSet, process};
 /// The differnt types of nodes that may appear in our AST
 #[derive(Debug)]
 pub enum Element {
    /// A Block is a list of statements
    Block(Block),
    /// A Command represents the desire to execute a command
    Command(Command),
    /// A literal is a ... literal value
    Literal(Value),
 }
@ -64,7 +70,7 @@ impl Command {
        builtin.call(ctx, &args)
    }
-    fn exec_command(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> {
+    fn exec_subprocess(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> {
        let args = self
            .args
            .iter()
@ -92,11 +98,10 @@ impl Command {
 impl Eval for Command {
    fn eval(&self, ctx: &mut Context) -> Result<Value, ExecError> {
-        let name = self.name.eval(ctx)?;
+        let name = self.name.eval(ctx)?.try_to_string()?;
-        let name = name.try_as_str()?;
+        match ctx.state.builtin(&name) {
        match ctx.state.builtin(name) {
            Some(builtin) => self.exec_builtin(ctx, builtin),
-            None => self.exec_command(ctx, name),
+            None => self.exec_subprocess(ctx, &name),
        }
    }
 }
@ -112,13 +117,16 @@ impl TreeBuilder {
        }
    }
-    fn descend(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> {
+    fn parse(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> {
        let e = match source.value() {
            parse::Value::Start => {
                let mut root = Block::new();
                let children = source.iter_children();
                for mut child in children {
-                    let e = self.descend(&mut child)?;
+                    if child.target.is_semi() {
                        continue;
                    }
                    let e = self.parse(&mut child)?;
                    match e {
                        Element::Command(cmd) => root.commands.push(cmd),
                        _ => panic!(),
@ -128,16 +136,35 @@ impl TreeBuilder {
            }
            parse::Value::Statement => {
                let mut children = source.iter_children();
-                let mut first = children.next().unwrap();
+                let mut first = match children.next() {
-                let name = self.descend(&mut first)?;
+                    Some(child) => child,
                    None => return Err(ParseError::StatementIsEmpty),
                };
                if first.target.is_semi() {
                    return Err(ParseError::StatementIsEmpty);
                }
                let name = self.parse(&mut first)?;
                let mut cmd = Command {
                    name: Box::new(name),
                    args: Vec::new(),
                };
                // we set complete to true when we find a semicolon. If there are any parse nodes
                // that appear -after- a semicolon, that indicates a bug in the prior stage.
                let mut complete = false;
                for mut child in children {
-                    let e = self.descend(&mut child)?;
+                    if child.target.is_semi() {
                        complete = true;
                        continue;
                    }
                    if complete {
                        return Err(ParseError::DanglingElements);
                    } else {
                        let e = self.parse(&mut child)?;
                        cmd.args.push(e);
                    }
                }
                Element::Command(cmd)
            }
            parse::Value::Terminal(Token::Word(word)) => {
@ -146,6 +173,9 @@ impl TreeBuilder {
            parse::Value::Terminal(Token::Glob(_)) => {
                todo!()
            }
            parse::Value::Terminal(Token::Semi(_)) => {
                return Err(ParseError::WhyParseSemicolon);
            }
        };
        self.visited.insert(source.target.id);
        Ok(e)
@ -157,7 +187,7 @@ pub fn parse(source: &str) -> Result<Element, ParseError> {
    let parser = parse::Parser::new(tokens);
    let mut parse_tree = parser.parse()?;
    let mut builder = TreeBuilder::new();
-    builder.descend(&mut parse_tree)
+    builder.parse(&mut parse_tree)
 }
 #[cfg(test)]