code organization

11 months ago · 5108e4457f
parent 97602bf42e
commit 5108e4457f
9 changed files with 236 additions and 260 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -10,7 +10,7 @@ thiserror = "1.0"
 macros = { path = "macros" }
 dirs = "4"

-log = "0.4"
+log = { version = "0.4", features = [ "max_level_off", "release_max_level_off" ] }

 [dependencies.windows]
 version = "0.44.0"
--- a/src/builtins.rs
+++ b/src/builtins.rs
@ -0,0 +1,3 @@
+pub trait BuiltinFn {
+    fn call(&self, args: Vec<&str>);
+}
--- a/src/error.rs
+++ b/src/error.rs
@ -1,4 +1,4 @@
-use crate::lex::{Token, Topoglyph};
+use crate::{lex::Token, topo::Glyph};
 use std::io;
 use thiserror::Error;
 use windows::Win32::Foundation::{GetLastError, BOOL};
@ -21,7 +21,7 @@ pub enum LexError {
    ExpectedWordCharacter,

    #[error("unexpected character: {0:?}")]
-    UnexpectedCharacter(Topoglyph),
+    UnexpectedCharacter(Glyph),

    #[error("unexpected eof")]
    UnexpectedEOF,
--- a/src/lex.rs
+++ b/src/lex.rs
@ -1,228 +1,12 @@
-use crate::error::LexError;
+use crate::{
+    error::LexError,
+    topo::{Glyph, Glyphs, Position},
+};
 use std::{collections::VecDeque, fmt, ops::Range, str::Chars};

-/// The position of a specific glyph within a corpus of text. We use this for rendering error
-/// messages and communicating to the user the location of errors.
-#[derive(PartialEq, Clone, Copy)]
-pub struct Position {
-    /// The visual line in which this glyph appears in the source text
-    line: u64,
-
-    /// The visual column in which this glyph appears in the source text
-    column: u64,
-}
-
-impl Position {
-    fn start() -> Self {
-        Self { line: 0, column: 0 }
-    }
-
-    /// Increments position by column, going from the current line,column position to the next
-    /// column on the same line.
-    fn incr(&mut self) -> Position {
-        let p = *self;
-        self.column += 1;
-        p
-    }
-
-    /// Increments the position by line, going from the current line,column position to the
-    /// beginning of the next line.
-    fn incr_line(&mut self) -> Position {
-        let p = *self;
-        self.column = 0;
-        self.line += 1;
-        p
-    }
-}
-
-impl fmt::Debug for Position {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
-        write!(f, "{line}:{column}", line = self.line, column = self.column)
-    }
-}
-
-/// A [Topoglyph] is a wrapper around a basic Rust [char] that includes information about where that
-/// char appears in the source text. Where the char only describes the
-/// [glyph](https://en.wikipedia.org/wiki/Glyph) (i.e., the graphical symbol), a topoglyph
-/// includes both the glyph and its position, to be used to describe the locations of parsed
-/// elements within a source text. Two glyphs appearing at different locations within a source text
-/// would correspond to two distinct topoglyphs.
-#[derive(PartialEq, Clone)]
-pub struct Topoglyph {
-    /// the unicode code point of the glyph
-    glyph: char,
-
-    /// The visual position in which the glyph appears; i.e., the human-comprehensible location
-    /// of the glyph in the source text
-    position: Position,
-
-    /// The byte offsets corresponding to this topoglyph in the source data; i.e., the
-    /// machine-comprehensible location of the glyph in the source text
-    bytes: Range<u64>,
-}
-
-impl Topoglyph {
-    fn is_word(&self) -> bool {
-        self.glyph.is_alphanumeric() || self.glyph == '.'
-    }
-
-    fn is_glob(&self) -> bool {
-        self.is_word() || self.glyph == '*'
-    }
-}
-
-impl fmt::Debug for Topoglyph {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
-        write!(f, "{char}@{pos:?}", char = self.glyph, pos = self.position)
-    }
-}
-
-/// A topoglypher produces [topoglyphs](Topoglyph) for a source text; i.e., it is an iterator of
-/// topoglyphs. The topoglypher is used to control reading from the source text and keeps a
-/// lookahead buffer of topoglyphs that have not been processed. While a [Lexer] is responsible
-/// for the creation and iteration of [tokens](Token), a topoglypher is responsible for the
-/// creation and iteration of topoglyphs.
-struct Topoglypher<'text> {
-    source: Chars<'text>,
-    next_position: Position,
-    bytes_read: u64,
-    lookahead: VecDeque<Topoglyph>,
-}
-
-impl<'text> Topoglypher<'text> {
-    fn new(source: &'text str) -> Self {
-        Self {
-            source: source.chars(),
-            next_position: Position::start(),
-            bytes_read: 0,
-            lookahead: VecDeque::new(),
-        }
-    }
-
-    /// reads the next n characters from the source text into our lookahead buffer
-    fn fill_lookahead(&mut self, n: usize) -> bool {
-        while self.lookahead.len() < n {
-            let c = match self.source.next() {
-                Some(c) => c,
-                None => break,
-            };
-
-            let len = c.len_utf8();
-            let start = self.bytes_read;
-            self.bytes_read += len as u64;
-            let position = if c == '\n' {
-                self.next_position.incr_line()
-            } else {
-                self.next_position.incr()
-            };
-            self.lookahead.push_back(Topoglyph {
-                glyph: c,
-                position,
-                bytes: Range {
-                    start,
-                    end: self.bytes_read,
-                },
-            })
-        }
-        self.lookahead.len() == n
-    }
-
-    /// returns a reference to the next character from the source text, advancing our internal
-    /// lookahead buffer if necessary. Returns None if we're already at the end of our source text.
-    fn peek(&mut self) -> Option<&Topoglyph> {
-        self.peek_at(0)
-    }
-
-    /// takes the next character from our input text
-    fn pop(&mut self) -> Result<Topoglyph, LexError> {
-        self.next().ok_or(LexError::UnexpectedEOF)
-    }
-
-    /// returns a reference to a character in our lookahead buffer at a given position. This allows
-    /// us to perform a lookahead read without consuming any tokens, maintaining our current
-    /// position and keeping our unconsumed characters safe.
-    fn peek_at(&mut self, idx: usize) -> Option<&Topoglyph> {
-        self.fill_lookahead(idx + 1);
-        self.lookahead.get(idx)
-    }
-
-    /// checks whether or not the next character in our source text matches some predicate
-    fn next_is<F>(&mut self, pred: F) -> bool
-    where
-        F: FnOnce(&Topoglyph) -> bool,
-    {
-        self.peek().map(pred).unwrap_or(false)
-    }
-
-    /// checks whether or not we're already at the end of our input text. If we're already at the
-    /// end of our input text, we do not expect any future reads to produce new characters.
-    fn at_eof(&mut self) -> bool {
-        self.peek().is_none()
-    }
-
-    /// discards characters from our current position so long as the upcoming characters match some
-    /// predicate. This is called yeet_while instead of skip_while in order to avoid conflicting
-    /// with the
-    /// [skip_while](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.skip_while)
-    /// method of the stdlib Iterator trait.
-    pub fn yeet_while<F>(&mut self, mut pred: F)
-    where
-        F: FnMut(&Topoglyph) -> bool,
-    {
-        while let Some(g) = self.peek() {
-            if pred(&g) {
-                self.next();
-            } else {
-                return;
-            }
-        }
-    }
-
-    fn yeet_whitespace(&mut self) {
-        self.yeet_while(|tg| tg.glyph.is_whitespace());
-    }
-
-    fn keep_word(&mut self) -> Result<Lexeme, LexError> {
-        let gs = self.keep_until(|g| g.glyph.is_whitespace());
-        if gs.is_empty() {
-            return Err(LexError::ExpectedWordCharacter);
-        }
-        Ok(Lexeme::from(gs))
-    }
-
-    fn keep_while<F>(&mut self, mut pred: F) -> Vec<Topoglyph>
-    where
-        F: FnMut(&Topoglyph) -> bool,
-    {
-        let mut keep = Vec::new();
-
-        while let Some(g) = self.peek() {
-            if pred(&g) {
-                keep.push(g.clone());
-                self.next();
-            } else {
-                break;
-            }
-        }
-
-        keep
-    }
-
-    fn keep_until<F>(&mut self, mut pred: F) -> Vec<Topoglyph>
-    where
-        F: FnMut(&Topoglyph) -> bool,
-    {
-        self.keep_while(|g| !pred(g))
-    }
-}
-
-impl<'text> Iterator for Topoglypher<'text> {
-    type Item = Topoglyph;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.fill_lookahead(1);
-        self.lookahead.pop_front()
-    }
+/// splits a corpus into Tokens.
+pub fn lex(source: &str) -> Result<Vec<Token>, LexError> {
+    Lexer::new(source).collect()
 }

 /// A Lexeme is the text of a given Token, without respect to that Token's type, but with respect
@ -230,7 +14,7 @@ impl<'text> Iterator for Topoglypher<'text> {
 /// the addresses of each of its characters with respect to some source text.
 #[derive(PartialEq, Clone)]
 pub struct Lexeme {
-    elems: Vec<Topoglyph>,
+    elems: Vec<Glyph>,
 }

 impl Lexeme {
@ -274,8 +58,8 @@ impl fmt::Display for Lexeme {
    }
 }

-impl From<Vec<Topoglyph>> for Lexeme {
-    fn from(v: Vec<Topoglyph>) -> Self {
+impl From<Vec<Glyph>> for Lexeme {
+    fn from(v: Vec<Glyph>) -> Self {
        Self { elems: v }
    }
 }
@ -303,13 +87,13 @@ impl Token {
 }

 pub struct Tokenizer<'text> {
-    source: Topoglypher<'text>,
+    source: Glyphs<'text>,
 }

 impl<'text> Tokenizer<'text> {
    pub fn new(text: &'text str) -> Self {
        Self {
-            source: Topoglypher::new(text),
+            source: Glyphs::new(text),
        }
    }

@ -331,7 +115,7 @@ impl<'text> Tokenizer<'text> {
        }
    }

-    fn lex_bare_string(&mut self, mut progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_bare_string(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
        while let Some(next) = self.source.peek() {
            match next.glyph {
                _ if next.glyph.is_whitespace() => break,
@ -355,7 +139,7 @@ impl<'text> Tokenizer<'text> {
        }
    }

-    fn lex_glob(&mut self, mut progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
        while let Some(next) = self.source.peek() {
            match next.glyph {
                _ if next.glyph.is_whitespace() => break,
@ -375,15 +159,15 @@ impl<'text> Tokenizer<'text> {
        }
    }

-    fn lex_raw_string(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_raw_string(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
        Err(LexError::not_yet("raw strings not done yet"))
    }

-    fn lex_interp_string(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_interp_string(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
        Err(LexError::not_yet("interpreted strings not done yet"))
    }

-    fn lex_var(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_var(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
        Err(LexError::not_yet("variables are not done yet"))
    }
 }
@ -396,10 +180,6 @@ impl<'text> Iterator for Tokenizer<'text> {
    }
 }

-pub fn lex(source: &str) -> Result<Vec<Token>, LexError> {
-    Tokenizer::new(source).collect()
-}
-
 pub struct Lexer<'text> {
    source: Tokenizer<'text>,
    lookahead: VecDeque<Token>,
@ -451,9 +231,9 @@ mod tests {
    use std::iter::zip;

    fn lexeme(txt: &str) -> Lexeme {
-        let x: Vec<Topoglyph> = txt
+        let x: Vec<Glyph> = txt
            .chars()
-            .map(|c| Topoglyph {
+            .map(|c| Glyph {
                glyph: c,
                position: Position::start(),
                bytes: 0..0,
--- a/src/main.rs
+++ b/src/main.rs
@ -1,3 +1,4 @@
+mod builtins;
 mod error;
 mod ext;
 mod input;
@ -10,6 +11,7 @@ mod parse;
 mod prompt;
 mod shell;
 mod syntax;
+mod topo;

 use crate::log::*;
 use prompt::Prompt;
@ -55,7 +57,12 @@ fn main() -> Result<()> {
                    shell.output.newline()?;
                    let s = shell.line.pop();
                    info!("◇ {}", s);
-                    match syntax::x(&s) {
+                    if let Ok(tokens) = lex::lex(&s) {
+                        for t in tokens {
+                            debug!("  {:?}", t);
+                        }
+                    }
+                    match syntax::parse(&s) {
                        Ok(tree) => {
                            debug!("  {:?}", tree);
                            let mut state = syntax::State::new();
--- a/src/parse.rs
+++ b/src/parse.rs
@ -3,6 +3,7 @@ use crate::lex::{Lexer, Token};
 use std::{
    cell::RefCell,
    collections::VecDeque,
+    io::Write,
    rc::{Rc, Weak},
    sync::atomic::AtomicUsize,
 };
@ -152,6 +153,13 @@ impl Cursor {
            idx: 0,
        }
    }
+
+    pub fn render_textree<W: Write>(&self, w: &mut W, depth: u32) {
+        write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2);
+        for child in self.iter_children() {
+            child.render_textree(w, depth + 1);
+        }
+    }
 }

 pub struct Parser<'text> {
--- a/src/shell.rs
+++ b/src/shell.rs
@ -3,7 +3,7 @@ use crate::{
    input,
    line::Line,
    log::*,
-    output,
+    output, syntax,
 };

 use std::path::{Path, PathBuf};
@ -15,6 +15,7 @@ pub struct Shell {
    pub input: input::Reader,
    pub output: output::Writer,
    pub line: Line,
+    pub state: syntax::State,
 }

 impl Shell {
@ -23,6 +24,7 @@ impl Shell {
            input: input::Reader::new()?,
            output: output::Writer::stdout()?,
            line: Line::new(),
+            state: syntax::State::new(),
        })
    }

@ -49,7 +51,7 @@ impl Shell {
    }

    pub fn seek_right(&mut self) -> Result<()> {
-        info!("» seek right");
+        info!("»");
        let n = self.line.seek_right();
        if n > 0 {
            // move right by the distance seeked
@ -59,7 +61,7 @@ impl Shell {
    }

    pub fn seek_left(&mut self) -> Result<()> {
-        info!("» seek left");
+        info!("«");
        let n = self.line.seek_left();
        if n > 0 {
            // move left by the distance seeked
--- a/src/syntax.rs
+++ b/src/syntax.rs
@ -1,15 +1,26 @@
 use crate::{
+    builtins::BuiltinFn,
    error::{ExecError, ParseError},
    lex::{Lexer, Token},
-    parse,
+    log::debug,
+    parse, syntax,
+};
+use std::{
+    collections::{HashMap, HashSet},
+    process,
 };
-use std::{collections::HashSet, process};

-pub struct State {}
+pub struct State {
+    builtins: HashMap<&'static str, Box<dyn BuiltinFn>>,
+    variables: HashMap<&'static str, syntax::Value>,
+}

 impl State {
    pub fn new() -> Self {
-        Self {}
+        Self {
+            builtins: HashMap::new(),
+            variables: HashMap::new(),
+        }
    }
 }

@ -108,7 +119,6 @@ impl Eval for Command {
    }
 }

-// ????? waht am i doing now
 struct TreeBuilder {
    visited: HashSet<usize>,
 }
@ -160,16 +170,11 @@ impl TreeBuilder {
    }
 }

-fn build(mut source: parse::Cursor) -> Result<Element, ParseError> {
-    source.up_to_root();
-    let mut builder = TreeBuilder::new();
-    builder.descend(&mut source)
-}
-
-pub fn x(source: &str) -> Result<Element, ParseError> {
+pub fn parse(source: &str) -> Result<Element, ParseError> {
    let tokens = Lexer::new(source);
    let parser = parse::Parser::new(tokens);
    let mut parse_tree = parser.parse()?;
+    debug!("parse tree: {parse_tree:?}");
    let mut builder = TreeBuilder::new();
    builder.descend(&mut parse_tree)
 }
@ -177,11 +182,10 @@ pub fn x(source: &str) -> Result<Element, ParseError> {
 #[cfg(test)]
 mod test {
    use super::*;
-    use crate::lex::lex;

    #[test]
    fn hi() -> Result<(), ParseError> {
-        let e = x("ls one two three")?;
+        let e = parse("ls one two three")?;
        print!("{:?}", e);
        todo!()
        //Ok(())
--- a/src/topo.rs
+++ b/src/topo.rs
@ -0,0 +1,172 @@
+use crate::error::LexError;
+use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
+
+/// The position of a specific glyph within a corpus of text. We use this for rendering error
+/// messages and communicating to the user the location of errors.
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub struct Position {
+    /// The visual line in which this glyph appears in the source text
+    pub line: u64,
+
+    /// The visual column in which this glyph appears in the source text
+    pub column: u64,
+}
+
+impl Position {
+    pub fn start() -> Self {
+        Self { line: 0, column: 0 }
+    }
+
+    /// Increments position by column, going from the current line,column position to the next
+    /// column on the same line.
+    pub fn incr(&mut self) -> Position {
+        let p = *self;
+        self.column += 1;
+        p
+    }
+
+    /// Increments the position by line, going from the current line,column position to the
+    /// beginning of the next line.
+    pub fn incr_line(&mut self) -> Position {
+        let p = *self;
+        self.column = 0;
+        self.line += 1;
+        p
+    }
+}
+
+#[derive(PartialEq, Clone)]
+pub struct Glyph {
+    /// the unicode code point of the glyph
+    pub glyph: char,
+
+    /// The visual position in which the glyph appears; i.e., the human-comprehensible location
+    /// of the glyph in the source text
+    pub position: Position,
+
+    /// The byte offsets corresponding to this topoglyph in the source data; i.e., the
+    /// machine-comprehensible location of the glyph in the source text
+    pub bytes: Range<u64>,
+}
+
+impl Glyph {
+    pub fn is_word(&self) -> bool {
+        self.glyph.is_alphanumeric() || self.glyph == '.'
+    }
+
+    pub fn is_glob(&self) -> bool {
+        self.is_word() || self.glyph == '*'
+    }
+}
+
+impl fmt::Debug for Glyph {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+        write!(
+            f,
+            "[{char} ({pos:?})]",
+            char = self.glyph,
+            pos = self.position
+        )
+    }
+}
+/// Glyphs produces [glyphs](Glyph) for a source text; i.e., it is an iterator of [Glyph] values.
+/// Glyphs is used to control reading from the source text and keeps a lookahead buffer of glyphs
+/// that have not been processed. While a [crate::lex::Lexer] is responsible for the creation and
+/// iteration of [tokens](crate::lex::Token), Glyphs is responsible for the creation and iteration
+/// of glyphs.
+pub struct Glyphs<'text> {
+    source: Chars<'text>,
+    next_position: Position,
+    bytes_read: u64,
+    lookahead: VecDeque<Glyph>,
+}
+
+impl<'text> Glyphs<'text> {
+    pub fn new(source: &'text str) -> Self {
+        // neat
+        Self {
+            source: source.chars(),
+            next_position: Position::start(),
+            bytes_read: 0,
+            lookahead: VecDeque::new(),
+        }
+    }
+
+    /// reads the next n characters from the source text into our lookahead buffer
+    fn fill_lookahead(&mut self, n: usize) -> bool {
+        while self.lookahead.len() < n {
+            let c = match self.source.next() {
+                Some(c) => c,
+                None => break,
+            };
+
+            let len = c.len_utf8();
+            let start = self.bytes_read;
+            self.bytes_read += len as u64;
+            let position = if c == '\n' {
+                self.next_position.incr_line()
+            } else {
+                self.next_position.incr()
+            };
+            self.lookahead.push_back(Glyph {
+                glyph: c,
+                position,
+                bytes: Range {
+                    start,
+                    end: self.bytes_read,
+                },
+            })
+        }
+        self.lookahead.len() == n
+    }
+
+    /// returns a reference to the next character from the source text, advancing our internal
+    /// lookahead buffer if necessary. Returns None if we're already at the end of our source text.
+    pub fn peek(&mut self) -> Option<&Glyph> {
+        self.peek_at(0)
+    }
+
+    /// takes the next character from our input text
+    pub fn pop(&mut self) -> Result<Glyph, LexError> {
+        self.next().ok_or(LexError::UnexpectedEOF)
+    }
+
+    /// returns a reference to a character in our lookahead buffer at a given position. This allows
+    /// us to perform a lookahead read without consuming any tokens, maintaining our current
+    /// position and keeping our unconsumed characters safe.
+    fn peek_at(&mut self, idx: usize) -> Option<&Glyph> {
+        self.fill_lookahead(idx + 1);
+        self.lookahead.get(idx)
+    }
+
+    /// discards characters from our current position so long as the upcoming characters match some
+    /// predicate. This is called yeet_while instead of skip_while in order to avoid conflicting
+    /// with the
+    /// [skip_while](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.skip_while)
+    /// method of the stdlib Iterator trait.
+    pub fn yeet_while<F>(&mut self, mut pred: F)
+    where
+        F: FnMut(&Glyph) -> bool,
+    {
+        while let Some(g) = self.peek() {
+            if pred(&g) {
+                self.next();
+            } else {
+                return;
+            }
+        }
+    }
+
+    pub fn yeet_whitespace(&mut self) {
+        self.yeet_while(|tg| tg.glyph.is_whitespace());
+    }
+}
+
+impl<'text> Iterator for Glyphs<'text> {
+    type Item = Glyph;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.fill_lookahead(1);
+        self.lookahead.pop_front()
+    }
+}