you can now use a semicolon to have TWO statements

main
Jordan Orelli 11 months ago
parent be5bebf25b
commit 5505cb48c6

@ -29,6 +29,12 @@ pub enum LexError {
#[error("unexpected character {g} at {pos:?}", g = .0.glyph, pos = .0.position)] #[error("unexpected character {g} at {pos:?}", g = .0.glyph, pos = .0.position)]
UnexpectedCharacter(Glyph), UnexpectedCharacter(Glyph),
#[error("unexpected character {g} at {pos:?} while lexing a bare string", g = .0.glyph, pos = .0.position)]
UnexpectedCharacterInBareString(Glyph),
#[error("unexpected character {g} at {pos:?} while lexing a glob", g = .0.glyph, pos = .0.position)]
UnexpectedCharacterInGlob(Glyph),
#[error("unexpected eof")] #[error("unexpected eof")]
UnexpectedEOF, UnexpectedEOF,
@ -61,6 +67,15 @@ pub enum ParseError {
#[error("Illegal attempt to push a value as a child to a terminal value")] #[error("Illegal attempt to push a value as a child to a terminal value")]
PushOntoTerminal, PushOntoTerminal,
#[error("Statement node has no children")]
StatementIsEmpty,
#[error("dangling")]
DanglingElements,
#[error("you wouldn't parse a semicolon")]
WhyParseSemicolon,
} }
#[derive(Debug, Error)] #[derive(Debug, Error)]

@ -68,6 +68,8 @@ pub enum Token {
/// A bare word containing 1 or more of the special characters ? or * /// A bare word containing 1 or more of the special characters ? or *
Glob(Lexeme), Glob(Lexeme),
Semi(Glyph),
} }
impl Token { impl Token {
@ -96,6 +98,7 @@ impl Token {
match self { match self {
Word(lexeme) | Glob(lexeme) => lexeme.text(), Word(lexeme) | Glob(lexeme) => lexeme.text(),
Semi(glyph) => String::from(glyph.glyph),
} }
} }
} }
@ -121,13 +124,10 @@ impl<'text> Tokenizer<'text> {
match next.glyph { match next.glyph {
_ if next.is_word() => Some(self.lex_bare_string(vec![next])), _ if next.is_word() => Some(self.lex_bare_string(vec![next])),
_ if next.is_glob() => Some(self.lex_glob(vec![next])), _ if next.is_glob() => Some(self.lex_glob(vec![next])),
// '\\' => match self.source.pop() {
// Ok(escaped) => Some(self.lex_bare_string(vec![escaped])),
// Err(e) => Some(Err(e)),
// },
'@' => Some(self.lex_var(vec![next])), '@' => Some(self.lex_var(vec![next])),
'\'' => Some(self.lex_raw_string(vec![next])), '\'' => Some(self.lex_raw_string(vec![next])),
'"' => Some(self.lex_interp_string(vec![next])), '"' => Some(self.lex_interp_string(vec![next])),
';' => Some(Ok(Token::Semi(next))),
_ => Some(Err(LexError::UnexpectedCharacter(next))), _ => Some(Err(LexError::UnexpectedCharacter(next))),
} }
} }
@ -141,11 +141,12 @@ impl<'text> Tokenizer<'text> {
progress.push(self.source.pop()?); progress.push(self.source.pop()?);
return self.lex_glob(progress); return self.lex_glob(progress);
} }
// '\\' => { ';' => break,
// self.source.pop()?; _ => {
// progress.push(self.source.pop()?); return Err(LexError::UnexpectedCharacterInBareString(
// } self.source.pop()?,
_ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)), ))
}
} }
} }
@ -159,13 +160,14 @@ impl<'text> Tokenizer<'text> {
fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> { fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
while let Some(next) = self.source.peek() { while let Some(next) = self.source.peek() {
match next.glyph { match next.glyph {
';' => break,
_ if next.glyph.is_whitespace() => break, _ if next.glyph.is_whitespace() => break,
_ if next.is_glob() => progress.push(self.source.pop()?), _ if next.is_glob() => progress.push(self.source.pop()?),
// '\\' => { // '\\' => {
// self.source.pop()?; // self.source.pop()?;
// progress.push(self.source.pop()?); // progress.push(self.source.pop()?);
// } // }
_ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)), _ => return Err(LexError::UnexpectedCharacterInGlob(self.source.pop()?)),
} }
} }

@ -6,14 +6,19 @@ use std::{
sync::atomic::AtomicUsize, sync::atomic::AtomicUsize,
}; };
/// The contents of a node in our parse tree. The parse tree consists of both terminal and
/// nonterminal symbols.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Value { pub enum Value {
/// The start symbol of our parse tree. Each parse tree is rooted in a node whose value is the /// The start symbol of our parse tree. Each parse tree is rooted in a node whose value is the
/// start symbol. This is the only node in the tree that should utilize the start symbol. /// start symbol. This is the only node in the tree that should utilize the start symbol.
Start, Start,
/// The children of a statement symbol make up the components of what will become a statement
/// in our ast
Statement, Statement,
/// Each of the tokens from the lex stage becomes a terminal node on our tree
Terminal(Token), Terminal(Token),
} }
@ -43,19 +48,28 @@ pub struct Node {
impl Node { impl Node {
fn new() -> Cursor { fn new() -> Cursor {
let root = Node { let root = Rc::new(Node {
id: next_id(), id: next_id(),
parent: None, parent: None,
value: Value::Start, value: Value::Start,
children: RefCell::new(Vec::new()), children: RefCell::new(Vec::new()),
}; });
let root = Rc::new(root);
Cursor { Cursor {
target: Rc::clone(&root), target: Rc::clone(&root),
prev: root.id,
root, root,
} }
} }
pub fn is_semi(&self) -> bool {
matches!(self.value, Value::Terminal(Token::Semi(_)))
}
}
impl PartialEq for Node {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
} }
pub struct ChildIter { pub struct ChildIter {
@ -74,6 +88,7 @@ impl Iterator for ChildIter {
Some(Cursor { Some(Cursor {
target: Rc::clone(v), target: Rc::clone(v),
root: Rc::clone(&self.root), root: Rc::clone(&self.root),
prev: self.target.id,
}) })
} }
} }
@ -83,11 +98,15 @@ fn next_id() -> usize {
LAST_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) LAST_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
} }
/// Cursor values expose access to a parse tree. /// Cursor values expose access to a parse tree. A cursor is logically a pointer to a single node
/// within a parse tree. The cursor helps with the ownership structure: so long as there is a
/// cursor to any node on the tree, the tree remains in memory. Once there are no cursors pointed
/// at the tree, it is dropped.
#[derive(Debug)] #[derive(Debug)]
pub struct Cursor { pub struct Cursor {
pub target: Rc<Node>, pub target: Rc<Node>,
root: Rc<Node>, root: Rc<Node>,
prev: usize,
} }
impl Cursor { impl Cursor {
@ -99,7 +118,7 @@ impl Cursor {
None => Err(ParseError::AtRootAlready), None => Err(ParseError::AtRootAlready),
Some(parent) => match parent.upgrade() { Some(parent) => match parent.upgrade() {
Some(parent) => { Some(parent) => {
self.target = parent; self.goto(parent);
Ok(()) Ok(())
} }
None => Err(ParseError::ParentIsGone), None => Err(ParseError::ParentIsGone),
@ -107,6 +126,40 @@ impl Cursor {
} }
} }
/// moves the cursor horizontally in the tree, selecting the node that appears after the
/// current target node
pub fn next_sibling(&mut self) -> Result<bool, ParseError> {
let next = match self.pick_next_sibling()? {
Some(node) => node,
None => return Ok(false),
};
self.prev = self.target.id;
self.target = next;
Ok(true)
}
fn pick_next_sibling(&mut self) -> Result<Option<Rc<Node>>, ParseError> {
let parent = self
.target
.parent
.clone()
.ok_or_else(|| ParseError::AtRootAlready)?;
// SAFETY: this is ok because the cursor always retains a pointer to the root of the tree,
// so we know that since we have a cursor, the parent cannot yet be dropped
let parent = parent.upgrade().unwrap();
let mut found_self = false;
for child in parent.children.borrow().iter() {
if found_self {
return Ok(Some(child.clone()));
}
if child.id == self.target.id {
found_self = true;
}
}
Ok(None)
}
/// Adds a value to the children of the current target node, then descends to select that /// Adds a value to the children of the current target node, then descends to select that
/// child. /// child.
fn push(&mut self, v: Value) -> Result<(), ParseError> { fn push(&mut self, v: Value) -> Result<(), ParseError> {
@ -124,12 +177,12 @@ impl Cursor {
.children .children
.try_borrow_mut()? .try_borrow_mut()?
.push(Rc::clone(&node)); .push(Rc::clone(&node));
self.target = node; self.goto(node);
Ok(()) Ok(())
} }
pub fn up_to_root(&mut self) { pub fn up_to_root(&mut self) {
self.target = Rc::clone(&self.root); self.goto(Rc::clone(&self.root));
} }
pub fn value(&self) -> &Value { pub fn value(&self) -> &Value {
@ -154,35 +207,33 @@ impl Cursor {
Rc::clone(&self.root) Rc::clone(&self.root)
} }
// pub fn render_textree<W: Write>(&self, w: &mut W, depth: u32) { fn goto(&mut self, next: Rc<Node>) {
// write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2); self.prev = self.target.id;
// for child in self.iter_children() { self.target = next;
// child.render_textree(w, depth + 1); }
// }
// }
} }
pub struct Parser<'text> { pub struct Parser<'text> {
source: Lexer<'text>, input: Lexer<'text>,
cursor: Cursor, output: Cursor,
} }
impl<'text> Parser<'text> { impl<'text> Parser<'text> {
pub fn new(source: Lexer<'text>) -> Self { pub fn new(source: Lexer<'text>) -> Self {
Self { Self {
source, input: source,
cursor: Node::new(), output: Node::new(),
} }
} }
pub fn parse(mut self) -> Result<Cursor, ParseError> { pub fn parse(mut self) -> Result<Cursor, ParseError> {
while self.step()? {} while self.step()? {}
self.cursor.up_to_root(); self.output.up_to_root();
Ok(self.cursor) Ok(self.output)
} }
fn step(&mut self) -> Result<bool, ParseError> { fn step(&mut self) -> Result<bool, ParseError> {
match self.cursor.value() { match self.output.value() {
Value::Start => self.step_start(), Value::Start => self.step_start(),
Value::Statement => self.step_statement(), Value::Statement => self.step_statement(),
Value::Terminal(_) => panic!(), Value::Terminal(_) => panic!(),
@ -190,30 +241,45 @@ impl<'text> Parser<'text> {
} }
fn step_start(&mut self) -> Result<bool, ParseError> { fn step_start(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Start)); assert!(matches!(self.output.value(), Value::Start));
match self.source.peek()? { match self.input.peek()? {
Some(Token::Word(_)) => { Some(Token::Word(_)) => {
self.cursor.push(Value::Statement)?; self.output.push(Value::Statement)?;
let token = self.source.next().unwrap()?; let token = self.input.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?; self.output.push(Value::Terminal(token))?;
self.cursor.up()?; self.output.up()?;
Ok(true) Ok(true)
} }
Some(Token::Glob(_)) => { Some(Token::Glob(_)) => {
let token = self.source.next().unwrap()?; let token = self.input.next().unwrap()?;
Err(ParseError::UnexpectedToken(token)) Err(ParseError::UnexpectedToken(token))
} }
Some(Token::Semi(_)) => {
self.output.push(Value::Statement)?;
let token = self.input.next().unwrap()?;
self.output.push(Value::Terminal(token))?;
self.output.up()?;
self.output.up()?;
Ok(true)
}
None => Ok(false), None => Ok(false),
} }
} }
fn step_statement(&mut self) -> Result<bool, ParseError> { fn step_statement(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Statement)); assert!(matches!(self.output.value(), Value::Statement));
match self.source.peek()? { match self.input.peek()? {
Some(Token::Word(_) | Token::Glob(_)) => { Some(Token::Word(_) | Token::Glob(_)) => {
let token = self.source.next().unwrap()?; let token = self.input.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?; self.output.push(Value::Terminal(token))?;
self.cursor.up()?; self.output.up()?;
Ok(true)
}
Some(Token::Semi(_)) => {
let token = self.input.next().unwrap()?;
self.output.push(Value::Terminal(token))?;
self.output.up()?;
self.output.up()?;
Ok(true) Ok(true)
} }
None => Ok(false), None => Ok(false),

@ -7,10 +7,16 @@ use crate::{
}; };
use std::{collections::HashSet, process}; use std::{collections::HashSet, process};
/// The differnt types of nodes that may appear in our AST
#[derive(Debug)] #[derive(Debug)]
pub enum Element { pub enum Element {
/// A Block is a list of statements
Block(Block), Block(Block),
/// A Command represents the desire to execute a command
Command(Command), Command(Command),
/// A literal is a ... literal value
Literal(Value), Literal(Value),
} }
@ -64,7 +70,7 @@ impl Command {
builtin.call(ctx, &args) builtin.call(ctx, &args)
} }
fn exec_command(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> { fn exec_subprocess(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> {
let args = self let args = self
.args .args
.iter() .iter()
@ -92,11 +98,10 @@ impl Command {
impl Eval for Command { impl Eval for Command {
fn eval(&self, ctx: &mut Context) -> Result<Value, ExecError> { fn eval(&self, ctx: &mut Context) -> Result<Value, ExecError> {
let name = self.name.eval(ctx)?; let name = self.name.eval(ctx)?.try_to_string()?;
let name = name.try_as_str()?; match ctx.state.builtin(&name) {
match ctx.state.builtin(name) {
Some(builtin) => self.exec_builtin(ctx, builtin), Some(builtin) => self.exec_builtin(ctx, builtin),
None => self.exec_command(ctx, name), None => self.exec_subprocess(ctx, &name),
} }
} }
} }
@ -112,13 +117,16 @@ impl TreeBuilder {
} }
} }
fn descend(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> { fn parse(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> {
let e = match source.value() { let e = match source.value() {
parse::Value::Start => { parse::Value::Start => {
let mut root = Block::new(); let mut root = Block::new();
let children = source.iter_children(); let children = source.iter_children();
for mut child in children { for mut child in children {
let e = self.descend(&mut child)?; if child.target.is_semi() {
continue;
}
let e = self.parse(&mut child)?;
match e { match e {
Element::Command(cmd) => root.commands.push(cmd), Element::Command(cmd) => root.commands.push(cmd),
_ => panic!(), _ => panic!(),
@ -128,15 +136,34 @@ impl TreeBuilder {
} }
parse::Value::Statement => { parse::Value::Statement => {
let mut children = source.iter_children(); let mut children = source.iter_children();
let mut first = children.next().unwrap(); let mut first = match children.next() {
let name = self.descend(&mut first)?; Some(child) => child,
None => return Err(ParseError::StatementIsEmpty),
};
if first.target.is_semi() {
return Err(ParseError::StatementIsEmpty);
}
let name = self.parse(&mut first)?;
let mut cmd = Command { let mut cmd = Command {
name: Box::new(name), name: Box::new(name),
args: Vec::new(), args: Vec::new(),
}; };
// we set complete to true when we find a semicolon. If there are any parse nodes
// that appear -after- a semicolon, that indicates a bug in the prior stage.
let mut complete = false;
for mut child in children { for mut child in children {
let e = self.descend(&mut child)?; if child.target.is_semi() {
cmd.args.push(e); complete = true;
continue;
}
if complete {
return Err(ParseError::DanglingElements);
} else {
let e = self.parse(&mut child)?;
cmd.args.push(e);
}
} }
Element::Command(cmd) Element::Command(cmd)
} }
@ -146,6 +173,9 @@ impl TreeBuilder {
parse::Value::Terminal(Token::Glob(_)) => { parse::Value::Terminal(Token::Glob(_)) => {
todo!() todo!()
} }
parse::Value::Terminal(Token::Semi(_)) => {
return Err(ParseError::WhyParseSemicolon);
}
}; };
self.visited.insert(source.target.id); self.visited.insert(source.target.id);
Ok(e) Ok(e)
@ -157,7 +187,7 @@ pub fn parse(source: &str) -> Result<Element, ParseError> {
let parser = parse::Parser::new(tokens); let parser = parse::Parser::new(tokens);
let mut parse_tree = parser.parse()?; let mut parse_tree = parser.parse()?;
let mut builder = TreeBuilder::new(); let mut builder = TreeBuilder::new();
builder.descend(&mut parse_tree) builder.parse(&mut parse_tree)
} }
#[cfg(test)] #[cfg(test)]

Loading…
Cancel
Save