you can now use a semicolon to have TWO statements

main
Jordan Orelli 11 months ago
parent be5bebf25b
commit 5505cb48c6

@ -29,6 +29,12 @@ pub enum LexError {
#[error("unexpected character {g} at {pos:?}", g = .0.glyph, pos = .0.position)]
UnexpectedCharacter(Glyph),
#[error("unexpected character {g} at {pos:?} while lexing a bare string", g = .0.glyph, pos = .0.position)]
UnexpectedCharacterInBareString(Glyph),
#[error("unexpected character {g} at {pos:?} while lexing a glob", g = .0.glyph, pos = .0.position)]
UnexpectedCharacterInGlob(Glyph),
#[error("unexpected eof")]
UnexpectedEOF,
@ -61,6 +67,15 @@ pub enum ParseError {
#[error("Illegal attempt to push a value as a child to a terminal value")]
PushOntoTerminal,
#[error("Statement node has no children")]
StatementIsEmpty,
#[error("dangling")]
DanglingElements,
#[error("you wouldn't parse a semicolon")]
WhyParseSemicolon,
}
#[derive(Debug, Error)]

@ -68,6 +68,8 @@ pub enum Token {
/// A bare word containing 1 or more of the special characters ? or *
Glob(Lexeme),
Semi(Glyph),
}
impl Token {
@ -96,6 +98,7 @@ impl Token {
match self {
Word(lexeme) | Glob(lexeme) => lexeme.text(),
Semi(glyph) => String::from(glyph.glyph),
}
}
}
@ -121,13 +124,10 @@ impl<'text> Tokenizer<'text> {
match next.glyph {
_ if next.is_word() => Some(self.lex_bare_string(vec![next])),
_ if next.is_glob() => Some(self.lex_glob(vec![next])),
// '\\' => match self.source.pop() {
// Ok(escaped) => Some(self.lex_bare_string(vec![escaped])),
// Err(e) => Some(Err(e)),
// },
'@' => Some(self.lex_var(vec![next])),
'\'' => Some(self.lex_raw_string(vec![next])),
'"' => Some(self.lex_interp_string(vec![next])),
';' => Some(Ok(Token::Semi(next))),
_ => Some(Err(LexError::UnexpectedCharacter(next))),
}
}
@ -141,11 +141,12 @@ impl<'text> Tokenizer<'text> {
progress.push(self.source.pop()?);
return self.lex_glob(progress);
}
// '\\' => {
// self.source.pop()?;
// progress.push(self.source.pop()?);
// }
_ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
';' => break,
_ => {
return Err(LexError::UnexpectedCharacterInBareString(
self.source.pop()?,
))
}
}
}
@ -159,13 +160,14 @@ impl<'text> Tokenizer<'text> {
fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
while let Some(next) = self.source.peek() {
match next.glyph {
';' => break,
_ if next.glyph.is_whitespace() => break,
_ if next.is_glob() => progress.push(self.source.pop()?),
// '\\' => {
// self.source.pop()?;
// progress.push(self.source.pop()?);
// }
_ => return Err(LexError::UnexpectedCharacter(self.source.pop()?)),
_ => return Err(LexError::UnexpectedCharacterInGlob(self.source.pop()?)),
}
}

@ -6,14 +6,19 @@ use std::{
sync::atomic::AtomicUsize,
};
/// The contents of a node in our parse tree. The parse tree consists of both terminal and
/// nonterminal symbols.
#[derive(Debug, PartialEq)]
pub enum Value {
/// The start symbol of our parse tree. Each parse tree is rooted in a node whose value is the
/// start symbol. This is the only node in the tree that should utilize the start symbol.
Start,
/// The children of a statement symbol make up the components of what will become a statement
/// in our ast
Statement,
/// Each of the tokens from the lex stage becomes a terminal node on our tree
Terminal(Token),
}
@ -43,19 +48,28 @@ pub struct Node {
impl Node {
fn new() -> Cursor {
let root = Node {
let root = Rc::new(Node {
id: next_id(),
parent: None,
value: Value::Start,
children: RefCell::new(Vec::new()),
};
let root = Rc::new(root);
});
Cursor {
target: Rc::clone(&root),
prev: root.id,
root,
}
}
pub fn is_semi(&self) -> bool {
matches!(self.value, Value::Terminal(Token::Semi(_)))
}
}
impl PartialEq for Node {
fn eq(&self, other: &Self) -> bool {
self.id == other.id
}
}
pub struct ChildIter {
@ -74,6 +88,7 @@ impl Iterator for ChildIter {
Some(Cursor {
target: Rc::clone(v),
root: Rc::clone(&self.root),
prev: self.target.id,
})
}
}
@ -83,11 +98,15 @@ fn next_id() -> usize {
LAST_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
/// Cursor values expose access to a parse tree.
/// Cursor values expose access to a parse tree. A cursor is logically a pointer to a single node
/// within a parse tree. The cursor helps with the ownership structure: so long as there is a
/// cursor to any node on the tree, the tree remains in memory. Once there are no cursors pointed
/// at the tree, it is dropped.
#[derive(Debug)]
pub struct Cursor {
pub target: Rc<Node>,
root: Rc<Node>,
prev: usize,
}
impl Cursor {
@ -99,7 +118,7 @@ impl Cursor {
None => Err(ParseError::AtRootAlready),
Some(parent) => match parent.upgrade() {
Some(parent) => {
self.target = parent;
self.goto(parent);
Ok(())
}
None => Err(ParseError::ParentIsGone),
@ -107,6 +126,40 @@ impl Cursor {
}
}
/// moves the cursor horizontally in the tree, selecting the node that appears after the
/// current target node
pub fn next_sibling(&mut self) -> Result<bool, ParseError> {
let next = match self.pick_next_sibling()? {
Some(node) => node,
None => return Ok(false),
};
self.prev = self.target.id;
self.target = next;
Ok(true)
}
fn pick_next_sibling(&mut self) -> Result<Option<Rc<Node>>, ParseError> {
let parent = self
.target
.parent
.clone()
.ok_or_else(|| ParseError::AtRootAlready)?;
// SAFETY: this is ok because the cursor always retains a pointer to the root of the tree,
// so we know that since we have a cursor, the parent cannot yet be dropped
let parent = parent.upgrade().unwrap();
let mut found_self = false;
for child in parent.children.borrow().iter() {
if found_self {
return Ok(Some(child.clone()));
}
if child.id == self.target.id {
found_self = true;
}
}
Ok(None)
}
/// Adds a value to the children of the current target node, then descends to select that
/// child.
fn push(&mut self, v: Value) -> Result<(), ParseError> {
@ -124,12 +177,12 @@ impl Cursor {
.children
.try_borrow_mut()?
.push(Rc::clone(&node));
self.target = node;
self.goto(node);
Ok(())
}
pub fn up_to_root(&mut self) {
self.target = Rc::clone(&self.root);
self.goto(Rc::clone(&self.root));
}
pub fn value(&self) -> &Value {
@ -154,35 +207,33 @@ impl Cursor {
Rc::clone(&self.root)
}
// pub fn render_textree<W: Write>(&self, w: &mut W, depth: u32) {
// write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2);
// for child in self.iter_children() {
// child.render_textree(w, depth + 1);
// }
// }
fn goto(&mut self, next: Rc<Node>) {
self.prev = self.target.id;
self.target = next;
}
}
pub struct Parser<'text> {
source: Lexer<'text>,
cursor: Cursor,
input: Lexer<'text>,
output: Cursor,
}
impl<'text> Parser<'text> {
pub fn new(source: Lexer<'text>) -> Self {
Self {
source,
cursor: Node::new(),
input: source,
output: Node::new(),
}
}
pub fn parse(mut self) -> Result<Cursor, ParseError> {
while self.step()? {}
self.cursor.up_to_root();
Ok(self.cursor)
self.output.up_to_root();
Ok(self.output)
}
fn step(&mut self) -> Result<bool, ParseError> {
match self.cursor.value() {
match self.output.value() {
Value::Start => self.step_start(),
Value::Statement => self.step_statement(),
Value::Terminal(_) => panic!(),
@ -190,30 +241,45 @@ impl<'text> Parser<'text> {
}
fn step_start(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Start));
match self.source.peek()? {
assert!(matches!(self.output.value(), Value::Start));
match self.input.peek()? {
Some(Token::Word(_)) => {
self.cursor.push(Value::Statement)?;
let token = self.source.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?;
self.cursor.up()?;
self.output.push(Value::Statement)?;
let token = self.input.next().unwrap()?;
self.output.push(Value::Terminal(token))?;
self.output.up()?;
Ok(true)
}
Some(Token::Glob(_)) => {
let token = self.source.next().unwrap()?;
let token = self.input.next().unwrap()?;
Err(ParseError::UnexpectedToken(token))
}
Some(Token::Semi(_)) => {
self.output.push(Value::Statement)?;
let token = self.input.next().unwrap()?;
self.output.push(Value::Terminal(token))?;
self.output.up()?;
self.output.up()?;
Ok(true)
}
None => Ok(false),
}
}
fn step_statement(&mut self) -> Result<bool, ParseError> {
assert!(matches!(self.cursor.value(), Value::Statement));
match self.source.peek()? {
assert!(matches!(self.output.value(), Value::Statement));
match self.input.peek()? {
Some(Token::Word(_) | Token::Glob(_)) => {
let token = self.source.next().unwrap()?;
self.cursor.push(Value::Terminal(token))?;
self.cursor.up()?;
let token = self.input.next().unwrap()?;
self.output.push(Value::Terminal(token))?;
self.output.up()?;
Ok(true)
}
Some(Token::Semi(_)) => {
let token = self.input.next().unwrap()?;
self.output.push(Value::Terminal(token))?;
self.output.up()?;
self.output.up()?;
Ok(true)
}
None => Ok(false),

@ -7,10 +7,16 @@ use crate::{
};
use std::{collections::HashSet, process};
/// The differnt types of nodes that may appear in our AST
#[derive(Debug)]
pub enum Element {
/// A Block is a list of statements
Block(Block),
/// A Command represents the desire to execute a command
Command(Command),
/// A literal is a ... literal value
Literal(Value),
}
@ -64,7 +70,7 @@ impl Command {
builtin.call(ctx, &args)
}
fn exec_command(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> {
fn exec_subprocess(&self, ctx: &mut Context, name: &str) -> Result<Value, ExecError> {
let args = self
.args
.iter()
@ -92,11 +98,10 @@ impl Command {
impl Eval for Command {
fn eval(&self, ctx: &mut Context) -> Result<Value, ExecError> {
let name = self.name.eval(ctx)?;
let name = name.try_as_str()?;
match ctx.state.builtin(name) {
let name = self.name.eval(ctx)?.try_to_string()?;
match ctx.state.builtin(&name) {
Some(builtin) => self.exec_builtin(ctx, builtin),
None => self.exec_command(ctx, name),
None => self.exec_subprocess(ctx, &name),
}
}
}
@ -112,13 +117,16 @@ impl TreeBuilder {
}
}
fn descend(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> {
fn parse(&mut self, source: &mut parse::Cursor) -> Result<Element, ParseError> {
let e = match source.value() {
parse::Value::Start => {
let mut root = Block::new();
let children = source.iter_children();
for mut child in children {
let e = self.descend(&mut child)?;
if child.target.is_semi() {
continue;
}
let e = self.parse(&mut child)?;
match e {
Element::Command(cmd) => root.commands.push(cmd),
_ => panic!(),
@ -128,15 +136,34 @@ impl TreeBuilder {
}
parse::Value::Statement => {
let mut children = source.iter_children();
let mut first = children.next().unwrap();
let name = self.descend(&mut first)?;
let mut first = match children.next() {
Some(child) => child,
None => return Err(ParseError::StatementIsEmpty),
};
if first.target.is_semi() {
return Err(ParseError::StatementIsEmpty);
}
let name = self.parse(&mut first)?;
let mut cmd = Command {
name: Box::new(name),
args: Vec::new(),
};
// we set complete to true when we find a semicolon. If there are any parse nodes
// that appear -after- a semicolon, that indicates a bug in the prior stage.
let mut complete = false;
for mut child in children {
let e = self.descend(&mut child)?;
cmd.args.push(e);
if child.target.is_semi() {
complete = true;
continue;
}
if complete {
return Err(ParseError::DanglingElements);
} else {
let e = self.parse(&mut child)?;
cmd.args.push(e);
}
}
Element::Command(cmd)
}
@ -146,6 +173,9 @@ impl TreeBuilder {
parse::Value::Terminal(Token::Glob(_)) => {
todo!()
}
parse::Value::Terminal(Token::Semi(_)) => {
return Err(ParseError::WhyParseSemicolon);
}
};
self.visited.insert(source.target.id);
Ok(e)
@ -157,7 +187,7 @@ pub fn parse(source: &str) -> Result<Element, ParseError> {
let parser = parse::Parser::new(tokens);
let mut parse_tree = parser.parse()?;
let mut builder = TreeBuilder::new();
builder.descend(&mut parse_tree)
builder.parse(&mut parse_tree)
}
#[cfg(test)]

Loading…
Cancel
Save