code organization

11 months ago · 5108e4457f
parent 97602bf42e
commit 5108e4457f
9 changed files with 236 additions and 260 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -10,7 +10,7 @@ thiserror = "1.0"
 macros = { path = "macros" }
 dirs = "4"
-log = "0.4"
+log = { version = "0.4", features = [ "max_level_off", "release_max_level_off" ] }
 [dependencies.windows]
 version = "0.44.0"
--- a/src/builtins.rs
+++ b/src/builtins.rs
@ -0,0 +1,3 @@
 pub trait BuiltinFn {
    fn call(&self, args: Vec<&str>);
 }
--- a/src/error.rs
+++ b/src/error.rs
@ -1,4 +1,4 @@
-use crate::lex::{Token, Topoglyph};
+use crate::{lex::Token, topo::Glyph};
 use std::io;
 use thiserror::Error;
 use windows::Win32::Foundation::{GetLastError, BOOL};
@ -21,7 +21,7 @@ pub enum LexError {
    ExpectedWordCharacter,
    #[error("unexpected character: {0:?}")]
-    UnexpectedCharacter(Topoglyph),
+    UnexpectedCharacter(Glyph),
    #[error("unexpected eof")]
    UnexpectedEOF,
--- a/src/lex.rs
+++ b/src/lex.rs
@ -1,228 +1,12 @@
-use crate::error::LexError;
+use crate::{
    error::LexError,
    topo::{Glyph, Glyphs, Position},
 };
 use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
-/// The position of a specific glyph within a corpus of text. We use this for rendering error
+/// splits a corpus into Tokens.
-/// messages and communicating to the user the location of errors.
+pub fn lex(source: &str) -> Result<Vec<Token>, LexError> {
-#[derive(PartialEq, Clone, Copy)]
+    Lexer::new(source).collect()
 pub struct Position {
    /// The visual line in which this glyph appears in the source text
    line: u64,
    /// The visual column in which this glyph appears in the source text
    column: u64,
 }
 impl Position {
    fn start() -> Self {
        Self { line: 0, column: 0 }
    }
    /// Increments position by column, going from the current line,column position to the next
    /// column on the same line.
    fn incr(&mut self) -> Position {
        let p = *self;
        self.column += 1;
        p
    }
    /// Increments the position by line, going from the current line,column position to the
    /// beginning of the next line.
    fn incr_line(&mut self) -> Position {
        let p = *self;
        self.column = 0;
        self.line += 1;
        p
    }
 }
 impl fmt::Debug for Position {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
        write!(f, "{line}:{column}", line = self.line, column = self.column)
    }
 }
 /// A [Topoglyph] is a wrapper around a basic Rust [char] that includes information about where that
 /// char appears in the source text. Where the char only describes the
 /// [glyph](https://en.wikipedia.org/wiki/Glyph) (i.e., the graphical symbol), a topoglyph
 /// includes both the glyph and its position, to be used to describe the locations of parsed
 /// elements within a source text. Two glyphs appearing at different locations within a source text
 /// would correspond to two distinct topoglyphs.
 #[derive(PartialEq, Clone)]
 pub struct Topoglyph {
    /// the unicode code point of the glyph
    glyph: char,
    /// The visual position in which the glyph appears; i.e., the human-comprehensible location
    /// of the glyph in the source text
    position: Position,
    /// The byte offsets corresponding to this topoglyph in the source data; i.e., the
    /// machine-comprehensible location of the glyph in the source text
    bytes: Range<u64>,
 }
 impl Topoglyph {
    fn is_word(&self) -> bool {
        self.glyph.is_alphanumeric() || self.glyph == '.'
    }
    fn is_glob(&self) -> bool {
        self.is_word() || self.glyph == '*'
    }
 }
 impl fmt::Debug for Topoglyph {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
        write!(f, "{char}@{pos:?}", char = self.glyph, pos = self.position)
    }
 }
 /// A topoglypher produces [topoglyphs](Topoglyph) for a source text; i.e., it is an iterator of
 /// topoglyphs. The topoglypher is used to control reading from the source text and keeps a
 /// lookahead buffer of topoglyphs that have not been processed. While a [Lexer] is responsible
 /// for the creation and iteration of [tokens](Token), a topoglypher is responsible for the
 /// creation and iteration of topoglyphs.
 struct Topoglypher<'text> {
    source: Chars<'text>,
    next_position: Position,
    bytes_read: u64,
    lookahead: VecDeque<Topoglyph>,
 }
 impl<'text> Topoglypher<'text> {
    fn new(source: &'text str) -> Self {
        Self {
            source: source.chars(),
            next_position: Position::start(),
            bytes_read: 0,
            lookahead: VecDeque::new(),
        }
    }
    /// reads the next n characters from the source text into our lookahead buffer
    fn fill_lookahead(&mut self, n: usize) -> bool {
        while self.lookahead.len() < n {
            let c = match self.source.next() {
                Some(c) => c,
                None => break,
            };
            let len = c.len_utf8();
            let start = self.bytes_read;
            self.bytes_read += len as u64;
            let position = if c == '\n' {
                self.next_position.incr_line()
            } else {
                self.next_position.incr()
            };
            self.lookahead.push_back(Topoglyph {
                glyph: c,
                position,
                bytes: Range {
                    start,
                    end: self.bytes_read,
                },
            })
        }
        self.lookahead.len() == n
    }
    /// returns a reference to the next character from the source text, advancing our internal
    /// lookahead buffer if necessary. Returns None if we're already at the end of our source text.
    fn peek(&mut self) -> Option<&Topoglyph> {
        self.peek_at(0)
    }
    /// takes the next character from our input text
    fn pop(&mut self) -> Result<Topoglyph, LexError> {
        self.next().ok_or(LexError::UnexpectedEOF)
    }
    /// returns a reference to a character in our lookahead buffer at a given position. This allows
    /// us to perform a lookahead read without consuming any tokens, maintaining our current
    /// position and keeping our unconsumed characters safe.
    fn peek_at(&mut self, idx: usize) -> Option<&Topoglyph> {
        self.fill_lookahead(idx + 1);
        self.lookahead.get(idx)
    }
    /// checks whether or not the next character in our source text matches some predicate
    fn next_is<F>(&mut self, pred: F) -> bool
    where
        F: FnOnce(&Topoglyph) -> bool,
    {
        self.peek().map(pred).unwrap_or(false)
    }
    /// checks whether or not we're already at the end of our input text. If we're already at the
    /// end of our input text, we do not expect any future reads to produce new characters.
    fn at_eof(&mut self) -> bool {
        self.peek().is_none()
    }
    /// discards characters from our current position so long as the upcoming characters match some
    /// predicate. This is called yeet_while instead of skip_while in order to avoid conflicting
    /// with the
    /// [skip_while](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.skip_while)
    /// method of the stdlib Iterator trait.
    pub fn yeet_while<F>(&mut self, mut pred: F)
    where
        F: FnMut(&Topoglyph) -> bool,
    {
        while let Some(g) = self.peek() {
            if pred(&g) {
                self.next();
            } else {
                return;
            }
        }
    }
    fn yeet_whitespace(&mut self) {
        self.yeet_while(|tg| tg.glyph.is_whitespace());
    }
    fn keep_word(&mut self) -> Result<Lexeme, LexError> {
        let gs = self.keep_until(|g| g.glyph.is_whitespace());
        if gs.is_empty() {
            return Err(LexError::ExpectedWordCharacter);
        }
        Ok(Lexeme::from(gs))
    }
    fn keep_while<F>(&mut self, mut pred: F) -> Vec<Topoglyph>
    where
        F: FnMut(&Topoglyph) -> bool,
    {
        let mut keep = Vec::new();
        while let Some(g) = self.peek() {
            if pred(&g) {
                keep.push(g.clone());
                self.next();
            } else {
                break;
            }
        }
        keep
    }
    fn keep_until<F>(&mut self, mut pred: F) -> Vec<Topoglyph>
    where
        F: FnMut(&Topoglyph) -> bool,
    {
        self.keep_while(|g| !pred(g))
    }
 }
 impl<'text> Iterator for Topoglypher<'text> {
    type Item = Topoglyph;
    fn next(&mut self) -> Option<Self::Item> {
        self.fill_lookahead(1);
        self.lookahead.pop_front()
    }
 }
 /// A Lexeme is the text of a given Token, without respect to that Token's type, but with respect
@ -230,7 +14,7 @@ impl<'text> Iterator for Topoglypher<'text> {
 /// the addresses of each of its characters with respect to some source text.
 #[derive(PartialEq, Clone)]
 pub struct Lexeme {
-    elems: Vec<Topoglyph>,
+    elems: Vec<Glyph>,
 }
 impl Lexeme {
@ -274,8 +58,8 @@ impl fmt::Display for Lexeme {
    }
 }
-impl From<Vec<Topoglyph>> for Lexeme {
+impl From<Vec<Glyph>> for Lexeme {
-    fn from(v: Vec<Topoglyph>) -> Self {
+    fn from(v: Vec<Glyph>) -> Self {
        Self { elems: v }
    }
 }
@ -303,13 +87,13 @@ impl Token {
 }
 pub struct Tokenizer<'text> {
-    source: Topoglypher<'text>,
+    source: Glyphs<'text>,
 }
 impl<'text> Tokenizer<'text> {
    pub fn new(text: &'text str) -> Self {
        Self {
-            source: Topoglypher::new(text),
+            source: Glyphs::new(text),
        }
    }
@ -331,7 +115,7 @@ impl<'text> Tokenizer<'text> {
        }
    }
-    fn lex_bare_string(&mut self, mut progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_bare_string(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
        while let Some(next) = self.source.peek() {
            match next.glyph {
                _ if next.glyph.is_whitespace() => break,
@ -355,7 +139,7 @@ impl<'text> Tokenizer<'text> {
        }
    }
-    fn lex_glob(&mut self, mut progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
        while let Some(next) = self.source.peek() {
            match next.glyph {
                _ if next.glyph.is_whitespace() => break,
@ -375,15 +159,15 @@ impl<'text> Tokenizer<'text> {
        }
    }
-    fn lex_raw_string(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_raw_string(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
        Err(LexError::not_yet("raw strings not done yet"))
    }
-    fn lex_interp_string(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_interp_string(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
        Err(LexError::not_yet("interpreted strings not done yet"))
    }
-    fn lex_var(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> {
+    fn lex_var(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
        Err(LexError::not_yet("variables are not done yet"))
    }
 }
@ -396,10 +180,6 @@ impl<'text> Iterator for Tokenizer<'text> {
    }
 }
 pub fn lex(source: &str) -> Result<Vec<Token>, LexError> {
    Tokenizer::new(source).collect()
 }
 pub struct Lexer<'text> {
    source: Tokenizer<'text>,
    lookahead: VecDeque<Token>,
@ -451,9 +231,9 @@ mod tests {
    use std::iter::zip;
    fn lexeme(txt: &str) -> Lexeme {
-        let x: Vec<Topoglyph> = txt
+        let x: Vec<Glyph> = txt
            .chars()
-            .map(|c| Topoglyph {
+            .map(|c| Glyph {
                glyph: c,
                position: Position::start(),
                bytes: 0..0,
--- a/src/main.rs
+++ b/src/main.rs
@ -1,3 +1,4 @@
 mod builtins;
 mod error;
 mod ext;
 mod input;
@ -10,6 +11,7 @@ mod parse;
 mod prompt;
 mod shell;
 mod syntax;
 mod topo;
 use crate::log::*;
 use prompt::Prompt;
@ -55,7 +57,12 @@ fn main() -> Result<()> {
                    shell.output.newline()?;
                    let s = shell.line.pop();
                    info!("◇ {}", s);
-                    match syntax::x(&s) {
+                    if let Ok(tokens) = lex::lex(&s) {
                        for t in tokens {
                            debug!("  {:?}", t);
                        }
                    }
                    match syntax::parse(&s) {
                        Ok(tree) => {
                            debug!("  {:?}", tree);
                            let mut state = syntax::State::new();
--- a/src/parse.rs
+++ b/src/parse.rs
@ -3,6 +3,7 @@ use crate::lex::{Lexer, Token};
 use std::{
    cell::RefCell,
    collections::VecDeque,
    io::Write,
    rc::{Rc, Weak},
    sync::atomic::AtomicUsize,
 };
@ -152,6 +153,13 @@ impl Cursor {
            idx: 0,
        }
    }
    pub fn render_textree<W: Write>(&self, w: &mut W, depth: u32) {
        write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2);
        for child in self.iter_children() {
            child.render_textree(w, depth + 1);
        }
    }
 }
 pub struct Parser<'text> {
--- a/src/shell.rs
+++ b/src/shell.rs
@ -3,7 +3,7 @@ use crate::{
    input,
    line::Line,
    log::*,
-    output,
+    output, syntax,
 };
 use std::path::{Path, PathBuf};
@ -15,6 +15,7 @@ pub struct Shell {
    pub input: input::Reader,
    pub output: output::Writer,
    pub line: Line,
    pub state: syntax::State,
 }
 impl Shell {
@ -23,6 +24,7 @@ impl Shell {
            input: input::Reader::new()?,
            output: output::Writer::stdout()?,
            line: Line::new(),
            state: syntax::State::new(),
        })
    }
@ -49,7 +51,7 @@ impl Shell {
    }
    pub fn seek_right(&mut self) -> Result<()> {
-        info!("» seek right");
+        info!("»");
        let n = self.line.seek_right();
        if n > 0 {
            // move right by the distance seeked
@ -59,7 +61,7 @@ impl Shell {
    }
    pub fn seek_left(&mut self) -> Result<()> {
-        info!("» seek left");
+        info!("«");
        let n = self.line.seek_left();
        if n > 0 {
            // move left by the distance seeked
--- a/src/syntax.rs
+++ b/src/syntax.rs
@ -1,15 +1,26 @@
 use crate::{
    builtins::BuiltinFn,
    error::{ExecError, ParseError},
    lex::{Lexer, Token},
-    parse,
+    log::debug,
    parse, syntax,
 };
 use std::{
    collections::{HashMap, HashSet},
    process,
 };
 use std::{collections::HashSet, process};
-pub struct State {}
+pub struct State {
    builtins: HashMap<&'static str, Box<dyn BuiltinFn>>,
    variables: HashMap<&'static str, syntax::Value>,
 }
 impl State {
    pub fn new() -> Self {
-        Self {}
+        Self {
            builtins: HashMap::new(),
            variables: HashMap::new(),
        }
    }
 }
@ -108,7 +119,6 @@ impl Eval for Command {
    }
 }
 // ????? waht am i doing now
 struct TreeBuilder {
    visited: HashSet<usize>,
 }
@ -160,16 +170,11 @@ impl TreeBuilder {
    }
 }
-fn build(mut source: parse::Cursor) -> Result<Element, ParseError> {
+pub fn parse(source: &str) -> Result<Element, ParseError> {
    source.up_to_root();
    let mut builder = TreeBuilder::new();
    builder.descend(&mut source)
 }
 pub fn x(source: &str) -> Result<Element, ParseError> {
    let tokens = Lexer::new(source);
    let parser = parse::Parser::new(tokens);
    let mut parse_tree = parser.parse()?;
    debug!("parse tree: {parse_tree:?}");
    let mut builder = TreeBuilder::new();
    builder.descend(&mut parse_tree)
 }
@ -177,11 +182,10 @@ pub fn x(source: &str) -> Result<Element, ParseError> {
 #[cfg(test)]
 mod test {
    use super::*;
    use crate::lex::lex;
    #[test]
    fn hi() -> Result<(), ParseError> {
-        let e = x("ls one two three")?;
+        let e = parse("ls one two three")?;
        print!("{:?}", e);
        todo!()
        //Ok(())
--- a/src/topo.rs
+++ b/src/topo.rs
@ -0,0 +1,172 @@
 use crate::error::LexError;
 use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
 /// The position of a specific glyph within a corpus of text. We use this for rendering error
 /// messages and communicating to the user the location of errors.
 #[derive(Debug, PartialEq, Clone, Copy)]
 pub struct Position {
    /// The visual line in which this glyph appears in the source text
    pub line: u64,
    /// The visual column in which this glyph appears in the source text
    pub column: u64,
 }
 impl Position {
    pub fn start() -> Self {
        Self { line: 0, column: 0 }
    }
    /// Increments position by column, going from the current line,column position to the next
    /// column on the same line.
    pub fn incr(&mut self) -> Position {
        let p = *self;
        self.column += 1;
        p
    }
    /// Increments the position by line, going from the current line,column position to the
    /// beginning of the next line.
    pub fn incr_line(&mut self) -> Position {
        let p = *self;
        self.column = 0;
        self.line += 1;
        p
    }
 }
 #[derive(PartialEq, Clone)]
 pub struct Glyph {
    /// the unicode code point of the glyph
    pub glyph: char,
    /// The visual position in which the glyph appears; i.e., the human-comprehensible location
    /// of the glyph in the source text
    pub position: Position,
    /// The byte offsets corresponding to this topoglyph in the source data; i.e., the
    /// machine-comprehensible location of the glyph in the source text
    pub bytes: Range<u64>,
 }
 impl Glyph {
    pub fn is_word(&self) -> bool {
        self.glyph.is_alphanumeric() || self.glyph == '.'
    }
    pub fn is_glob(&self) -> bool {
        self.is_word() || self.glyph == '*'
    }
 }
 impl fmt::Debug for Glyph {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
        write!(
            f,
            "[{char} ({pos:?})]",
            char = self.glyph,
            pos = self.position
        )
    }
 }
 /// Glyphs produces [glyphs](Glyph) for a source text; i.e., it is an iterator of [Glyph] values.
 /// Glyphs is used to control reading from the source text and keeps a lookahead buffer of glyphs
 /// that have not been processed. While a [crate::lex::Lexer] is responsible for the creation and
 /// iteration of [tokens](crate::lex::Token), Glyphs is responsible for the creation and iteration
 /// of glyphs.
 pub struct Glyphs<'text> {
    source: Chars<'text>,
    next_position: Position,
    bytes_read: u64,
    lookahead: VecDeque<Glyph>,
 }
 impl<'text> Glyphs<'text> {
    pub fn new(source: &'text str) -> Self {
        // neat
        Self {
            source: source.chars(),
            next_position: Position::start(),
            bytes_read: 0,
            lookahead: VecDeque::new(),
        }
    }
    /// reads the next n characters from the source text into our lookahead buffer
    fn fill_lookahead(&mut self, n: usize) -> bool {
        while self.lookahead.len() < n {
            let c = match self.source.next() {
                Some(c) => c,
                None => break,
            };
            let len = c.len_utf8();
            let start = self.bytes_read;
            self.bytes_read += len as u64;
            let position = if c == '\n' {
                self.next_position.incr_line()
            } else {
                self.next_position.incr()
            };
            self.lookahead.push_back(Glyph {
                glyph: c,
                position,
                bytes: Range {
                    start,
                    end: self.bytes_read,
                },
            })
        }
        self.lookahead.len() == n
    }
    /// returns a reference to the next character from the source text, advancing our internal
    /// lookahead buffer if necessary. Returns None if we're already at the end of our source text.
    pub fn peek(&mut self) -> Option<&Glyph> {
        self.peek_at(0)
    }
    /// takes the next character from our input text
    pub fn pop(&mut self) -> Result<Glyph, LexError> {
        self.next().ok_or(LexError::UnexpectedEOF)
    }
    /// returns a reference to a character in our lookahead buffer at a given position. This allows
    /// us to perform a lookahead read without consuming any tokens, maintaining our current
    /// position and keeping our unconsumed characters safe.
    fn peek_at(&mut self, idx: usize) -> Option<&Glyph> {
        self.fill_lookahead(idx + 1);
        self.lookahead.get(idx)
    }
    /// discards characters from our current position so long as the upcoming characters match some
    /// predicate. This is called yeet_while instead of skip_while in order to avoid conflicting
    /// with the
    /// [skip_while](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.skip_while)
    /// method of the stdlib Iterator trait.
    pub fn yeet_while<F>(&mut self, mut pred: F)
    where
        F: FnMut(&Glyph) -> bool,
    {
        while let Some(g) = self.peek() {
            if pred(&g) {
                self.next();
            } else {
                return;
            }
        }
    }
    pub fn yeet_whitespace(&mut self) {
        self.yeet_while(|tg| tg.glyph.is_whitespace());
    }
 }
 impl<'text> Iterator for Glyphs<'text> {
    type Item = Glyph;
    fn next(&mut self) -> Option<Self::Item> {
        self.fill_lookahead(1);
        self.lookahead.pop_front()
    }
 }