code organization

main
Jordan Orelli 11 months ago
parent 97602bf42e
commit 5108e4457f

@ -10,7 +10,7 @@ thiserror = "1.0"
macros = { path = "macros" } macros = { path = "macros" }
dirs = "4" dirs = "4"
log = "0.4" log = { version = "0.4", features = [ "max_level_off", "release_max_level_off" ] }
[dependencies.windows] [dependencies.windows]
version = "0.44.0" version = "0.44.0"

@ -0,0 +1,3 @@
pub trait BuiltinFn {
fn call(&self, args: Vec<&str>);
}

@ -1,4 +1,4 @@
use crate::lex::{Token, Topoglyph}; use crate::{lex::Token, topo::Glyph};
use std::io; use std::io;
use thiserror::Error; use thiserror::Error;
use windows::Win32::Foundation::{GetLastError, BOOL}; use windows::Win32::Foundation::{GetLastError, BOOL};
@ -21,7 +21,7 @@ pub enum LexError {
ExpectedWordCharacter, ExpectedWordCharacter,
#[error("unexpected character: {0:?}")] #[error("unexpected character: {0:?}")]
UnexpectedCharacter(Topoglyph), UnexpectedCharacter(Glyph),
#[error("unexpected eof")] #[error("unexpected eof")]
UnexpectedEOF, UnexpectedEOF,

@ -1,228 +1,12 @@
use crate::error::LexError; use crate::{
use std::{collections::VecDeque, fmt, ops::Range, str::Chars}; error::LexError,
topo::{Glyph, Glyphs, Position},
/// The position of a specific glyph within a corpus of text. We use this for rendering error
/// messages and communicating to the user the location of errors.
#[derive(PartialEq, Clone, Copy)]
pub struct Position {
/// The visual line in which this glyph appears in the source text
line: u64,
/// The visual column in which this glyph appears in the source text
column: u64,
}
impl Position {
fn start() -> Self {
Self { line: 0, column: 0 }
}
/// Increments position by column, going from the current line,column position to the next
/// column on the same line.
fn incr(&mut self) -> Position {
let p = *self;
self.column += 1;
p
}
/// Increments the position by line, going from the current line,column position to the
/// beginning of the next line.
fn incr_line(&mut self) -> Position {
let p = *self;
self.column = 0;
self.line += 1;
p
}
}
impl fmt::Debug for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "{line}:{column}", line = self.line, column = self.column)
}
}
/// A [Topoglyph] is a wrapper around a basic Rust [char] that includes information about where that
/// char appears in the source text. Where the char only describes the
/// [glyph](https://en.wikipedia.org/wiki/Glyph) (i.e., the graphical symbol), a topoglyph
/// includes both the glyph and its position, to be used to describe the locations of parsed
/// elements within a source text. Two glyphs appearing at different locations within a source text
/// would correspond to two distinct topoglyphs.
#[derive(PartialEq, Clone)]
pub struct Topoglyph {
/// the unicode code point of the glyph
glyph: char,
/// The visual position in which the glyph appears; i.e., the human-comprehensible location
/// of the glyph in the source text
position: Position,
/// The byte offsets corresponding to this topoglyph in the source data; i.e., the
/// machine-comprehensible location of the glyph in the source text
bytes: Range<u64>,
}
impl Topoglyph {
fn is_word(&self) -> bool {
self.glyph.is_alphanumeric() || self.glyph == '.'
}
fn is_glob(&self) -> bool {
self.is_word() || self.glyph == '*'
}
}
impl fmt::Debug for Topoglyph {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "{char}@{pos:?}", char = self.glyph, pos = self.position)
}
}
/// A topoglypher produces [topoglyphs](Topoglyph) for a source text; i.e., it is an iterator of
/// topoglyphs. The topoglypher is used to control reading from the source text and keeps a
/// lookahead buffer of topoglyphs that have not been processed. While a [Lexer] is responsible
/// for the creation and iteration of [tokens](Token), a topoglypher is responsible for the
/// creation and iteration of topoglyphs.
struct Topoglypher<'text> {
source: Chars<'text>,
next_position: Position,
bytes_read: u64,
lookahead: VecDeque<Topoglyph>,
}
impl<'text> Topoglypher<'text> {
fn new(source: &'text str) -> Self {
Self {
source: source.chars(),
next_position: Position::start(),
bytes_read: 0,
lookahead: VecDeque::new(),
}
}
/// reads the next n characters from the source text into our lookahead buffer
fn fill_lookahead(&mut self, n: usize) -> bool {
while self.lookahead.len() < n {
let c = match self.source.next() {
Some(c) => c,
None => break,
}; };
use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
let len = c.len_utf8(); /// splits a corpus into Tokens.
let start = self.bytes_read; pub fn lex(source: &str) -> Result<Vec<Token>, LexError> {
self.bytes_read += len as u64; Lexer::new(source).collect()
let position = if c == '\n' {
self.next_position.incr_line()
} else {
self.next_position.incr()
};
self.lookahead.push_back(Topoglyph {
glyph: c,
position,
bytes: Range {
start,
end: self.bytes_read,
},
})
}
self.lookahead.len() == n
}
/// returns a reference to the next character from the source text, advancing our internal
/// lookahead buffer if necessary. Returns None if we're already at the end of our source text.
fn peek(&mut self) -> Option<&Topoglyph> {
self.peek_at(0)
}
/// takes the next character from our input text
fn pop(&mut self) -> Result<Topoglyph, LexError> {
self.next().ok_or(LexError::UnexpectedEOF)
}
/// returns a reference to a character in our lookahead buffer at a given position. This allows
/// us to perform a lookahead read without consuming any tokens, maintaining our current
/// position and keeping our unconsumed characters safe.
fn peek_at(&mut self, idx: usize) -> Option<&Topoglyph> {
self.fill_lookahead(idx + 1);
self.lookahead.get(idx)
}
/// checks whether or not the next character in our source text matches some predicate
fn next_is<F>(&mut self, pred: F) -> bool
where
F: FnOnce(&Topoglyph) -> bool,
{
self.peek().map(pred).unwrap_or(false)
}
/// checks whether or not we're already at the end of our input text. If we're already at the
/// end of our input text, we do not expect any future reads to produce new characters.
fn at_eof(&mut self) -> bool {
self.peek().is_none()
}
/// discards characters from our current position so long as the upcoming characters match some
/// predicate. This is called yeet_while instead of skip_while in order to avoid conflicting
/// with the
/// [skip_while](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.skip_while)
/// method of the stdlib Iterator trait.
pub fn yeet_while<F>(&mut self, mut pred: F)
where
F: FnMut(&Topoglyph) -> bool,
{
while let Some(g) = self.peek() {
if pred(&g) {
self.next();
} else {
return;
}
}
}
fn yeet_whitespace(&mut self) {
self.yeet_while(|tg| tg.glyph.is_whitespace());
}
fn keep_word(&mut self) -> Result<Lexeme, LexError> {
let gs = self.keep_until(|g| g.glyph.is_whitespace());
if gs.is_empty() {
return Err(LexError::ExpectedWordCharacter);
}
Ok(Lexeme::from(gs))
}
fn keep_while<F>(&mut self, mut pred: F) -> Vec<Topoglyph>
where
F: FnMut(&Topoglyph) -> bool,
{
let mut keep = Vec::new();
while let Some(g) = self.peek() {
if pred(&g) {
keep.push(g.clone());
self.next();
} else {
break;
}
}
keep
}
fn keep_until<F>(&mut self, mut pred: F) -> Vec<Topoglyph>
where
F: FnMut(&Topoglyph) -> bool,
{
self.keep_while(|g| !pred(g))
}
}
impl<'text> Iterator for Topoglypher<'text> {
type Item = Topoglyph;
fn next(&mut self) -> Option<Self::Item> {
self.fill_lookahead(1);
self.lookahead.pop_front()
}
} }
/// A Lexeme is the text of a given Token, without respect to that Token's type, but with respect /// A Lexeme is the text of a given Token, without respect to that Token's type, but with respect
@ -230,7 +14,7 @@ impl<'text> Iterator for Topoglypher<'text> {
/// the addresses of each of its characters with respect to some source text. /// the addresses of each of its characters with respect to some source text.
#[derive(PartialEq, Clone)] #[derive(PartialEq, Clone)]
pub struct Lexeme { pub struct Lexeme {
elems: Vec<Topoglyph>, elems: Vec<Glyph>,
} }
impl Lexeme { impl Lexeme {
@ -274,8 +58,8 @@ impl fmt::Display for Lexeme {
} }
} }
impl From<Vec<Topoglyph>> for Lexeme { impl From<Vec<Glyph>> for Lexeme {
fn from(v: Vec<Topoglyph>) -> Self { fn from(v: Vec<Glyph>) -> Self {
Self { elems: v } Self { elems: v }
} }
} }
@ -303,13 +87,13 @@ impl Token {
} }
pub struct Tokenizer<'text> { pub struct Tokenizer<'text> {
source: Topoglypher<'text>, source: Glyphs<'text>,
} }
impl<'text> Tokenizer<'text> { impl<'text> Tokenizer<'text> {
pub fn new(text: &'text str) -> Self { pub fn new(text: &'text str) -> Self {
Self { Self {
source: Topoglypher::new(text), source: Glyphs::new(text),
} }
} }
@ -331,7 +115,7 @@ impl<'text> Tokenizer<'text> {
} }
} }
fn lex_bare_string(&mut self, mut progress: Vec<Topoglyph>) -> Result<Token, LexError> { fn lex_bare_string(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
while let Some(next) = self.source.peek() { while let Some(next) = self.source.peek() {
match next.glyph { match next.glyph {
_ if next.glyph.is_whitespace() => break, _ if next.glyph.is_whitespace() => break,
@ -355,7 +139,7 @@ impl<'text> Tokenizer<'text> {
} }
} }
fn lex_glob(&mut self, mut progress: Vec<Topoglyph>) -> Result<Token, LexError> { fn lex_glob(&mut self, mut progress: Vec<Glyph>) -> Result<Token, LexError> {
while let Some(next) = self.source.peek() { while let Some(next) = self.source.peek() {
match next.glyph { match next.glyph {
_ if next.glyph.is_whitespace() => break, _ if next.glyph.is_whitespace() => break,
@ -375,15 +159,15 @@ impl<'text> Tokenizer<'text> {
} }
} }
fn lex_raw_string(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> { fn lex_raw_string(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
Err(LexError::not_yet("raw strings not done yet")) Err(LexError::not_yet("raw strings not done yet"))
} }
fn lex_interp_string(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> { fn lex_interp_string(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
Err(LexError::not_yet("interpreted strings not done yet")) Err(LexError::not_yet("interpreted strings not done yet"))
} }
fn lex_var(&mut self, _progress: Vec<Topoglyph>) -> Result<Token, LexError> { fn lex_var(&mut self, _progress: Vec<Glyph>) -> Result<Token, LexError> {
Err(LexError::not_yet("variables are not done yet")) Err(LexError::not_yet("variables are not done yet"))
} }
} }
@ -396,10 +180,6 @@ impl<'text> Iterator for Tokenizer<'text> {
} }
} }
pub fn lex(source: &str) -> Result<Vec<Token>, LexError> {
Tokenizer::new(source).collect()
}
pub struct Lexer<'text> { pub struct Lexer<'text> {
source: Tokenizer<'text>, source: Tokenizer<'text>,
lookahead: VecDeque<Token>, lookahead: VecDeque<Token>,
@ -451,9 +231,9 @@ mod tests {
use std::iter::zip; use std::iter::zip;
fn lexeme(txt: &str) -> Lexeme { fn lexeme(txt: &str) -> Lexeme {
let x: Vec<Topoglyph> = txt let x: Vec<Glyph> = txt
.chars() .chars()
.map(|c| Topoglyph { .map(|c| Glyph {
glyph: c, glyph: c,
position: Position::start(), position: Position::start(),
bytes: 0..0, bytes: 0..0,

@ -1,3 +1,4 @@
mod builtins;
mod error; mod error;
mod ext; mod ext;
mod input; mod input;
@ -10,6 +11,7 @@ mod parse;
mod prompt; mod prompt;
mod shell; mod shell;
mod syntax; mod syntax;
mod topo;
use crate::log::*; use crate::log::*;
use prompt::Prompt; use prompt::Prompt;
@ -55,7 +57,12 @@ fn main() -> Result<()> {
shell.output.newline()?; shell.output.newline()?;
let s = shell.line.pop(); let s = shell.line.pop();
info!("◇ {}", s); info!("◇ {}", s);
match syntax::x(&s) { if let Ok(tokens) = lex::lex(&s) {
for t in tokens {
debug!(" {:?}", t);
}
}
match syntax::parse(&s) {
Ok(tree) => { Ok(tree) => {
debug!(" {:?}", tree); debug!(" {:?}", tree);
let mut state = syntax::State::new(); let mut state = syntax::State::new();

@ -3,6 +3,7 @@ use crate::lex::{Lexer, Token};
use std::{ use std::{
cell::RefCell, cell::RefCell,
collections::VecDeque, collections::VecDeque,
io::Write,
rc::{Rc, Weak}, rc::{Rc, Weak},
sync::atomic::AtomicUsize, sync::atomic::AtomicUsize,
}; };
@ -152,6 +153,13 @@ impl Cursor {
idx: 0, idx: 0,
} }
} }
pub fn render_textree<W: Write>(&self, w: &mut W, depth: u32) {
write!(w, "{:?} {pad:?}", self.target.value, pad = depth * 2);
for child in self.iter_children() {
child.render_textree(w, depth + 1);
}
}
} }
pub struct Parser<'text> { pub struct Parser<'text> {

@ -3,7 +3,7 @@ use crate::{
input, input,
line::Line, line::Line,
log::*, log::*,
output, output, syntax,
}; };
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -15,6 +15,7 @@ pub struct Shell {
pub input: input::Reader, pub input: input::Reader,
pub output: output::Writer, pub output: output::Writer,
pub line: Line, pub line: Line,
pub state: syntax::State,
} }
impl Shell { impl Shell {
@ -23,6 +24,7 @@ impl Shell {
input: input::Reader::new()?, input: input::Reader::new()?,
output: output::Writer::stdout()?, output: output::Writer::stdout()?,
line: Line::new(), line: Line::new(),
state: syntax::State::new(),
}) })
} }
@ -49,7 +51,7 @@ impl Shell {
} }
pub fn seek_right(&mut self) -> Result<()> { pub fn seek_right(&mut self) -> Result<()> {
info!( seek right"); info!(");
let n = self.line.seek_right(); let n = self.line.seek_right();
if n > 0 { if n > 0 {
// move right by the distance seeked // move right by the distance seeked
@ -59,7 +61,7 @@ impl Shell {
} }
pub fn seek_left(&mut self) -> Result<()> { pub fn seek_left(&mut self) -> Result<()> {
info!("» seek left"); info!("«");
let n = self.line.seek_left(); let n = self.line.seek_left();
if n > 0 { if n > 0 {
// move left by the distance seeked // move left by the distance seeked

@ -1,15 +1,26 @@
use crate::{ use crate::{
builtins::BuiltinFn,
error::{ExecError, ParseError}, error::{ExecError, ParseError},
lex::{Lexer, Token}, lex::{Lexer, Token},
parse, log::debug,
parse, syntax,
};
use std::{
collections::{HashMap, HashSet},
process,
}; };
use std::{collections::HashSet, process};
pub struct State {} pub struct State {
builtins: HashMap<&'static str, Box<dyn BuiltinFn>>,
variables: HashMap<&'static str, syntax::Value>,
}
impl State { impl State {
pub fn new() -> Self { pub fn new() -> Self {
Self {} Self {
builtins: HashMap::new(),
variables: HashMap::new(),
}
} }
} }
@ -108,7 +119,6 @@ impl Eval for Command {
} }
} }
// ????? waht am i doing now
struct TreeBuilder { struct TreeBuilder {
visited: HashSet<usize>, visited: HashSet<usize>,
} }
@ -160,16 +170,11 @@ impl TreeBuilder {
} }
} }
fn build(mut source: parse::Cursor) -> Result<Element, ParseError> { pub fn parse(source: &str) -> Result<Element, ParseError> {
source.up_to_root();
let mut builder = TreeBuilder::new();
builder.descend(&mut source)
}
pub fn x(source: &str) -> Result<Element, ParseError> {
let tokens = Lexer::new(source); let tokens = Lexer::new(source);
let parser = parse::Parser::new(tokens); let parser = parse::Parser::new(tokens);
let mut parse_tree = parser.parse()?; let mut parse_tree = parser.parse()?;
debug!("parse tree: {parse_tree:?}");
let mut builder = TreeBuilder::new(); let mut builder = TreeBuilder::new();
builder.descend(&mut parse_tree) builder.descend(&mut parse_tree)
} }
@ -177,11 +182,10 @@ pub fn x(source: &str) -> Result<Element, ParseError> {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
use crate::lex::lex;
#[test] #[test]
fn hi() -> Result<(), ParseError> { fn hi() -> Result<(), ParseError> {
let e = x("ls one two three")?; let e = parse("ls one two three")?;
print!("{:?}", e); print!("{:?}", e);
todo!() todo!()
//Ok(()) //Ok(())

@ -0,0 +1,172 @@
use crate::error::LexError;
use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
/// The position of a specific glyph within a corpus of text. We use this for rendering error
/// messages and communicating to the user the location of errors.
#[derive(Debug, PartialEq, Clone, Copy)]
pub struct Position {
/// The visual line in which this glyph appears in the source text
pub line: u64,
/// The visual column in which this glyph appears in the source text
pub column: u64,
}
impl Position {
pub fn start() -> Self {
Self { line: 0, column: 0 }
}
/// Increments position by column, going from the current line,column position to the next
/// column on the same line.
pub fn incr(&mut self) -> Position {
let p = *self;
self.column += 1;
p
}
/// Increments the position by line, going from the current line,column position to the
/// beginning of the next line.
pub fn incr_line(&mut self) -> Position {
let p = *self;
self.column = 0;
self.line += 1;
p
}
}
#[derive(PartialEq, Clone)]
pub struct Glyph {
/// the unicode code point of the glyph
pub glyph: char,
/// The visual position in which the glyph appears; i.e., the human-comprehensible location
/// of the glyph in the source text
pub position: Position,
/// The byte offsets corresponding to this topoglyph in the source data; i.e., the
/// machine-comprehensible location of the glyph in the source text
pub bytes: Range<u64>,
}
impl Glyph {
pub fn is_word(&self) -> bool {
self.glyph.is_alphanumeric() || self.glyph == '.'
}
pub fn is_glob(&self) -> bool {
self.is_word() || self.glyph == '*'
}
}
impl fmt::Debug for Glyph {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
write!(
f,
"[{char} ({pos:?})]",
char = self.glyph,
pos = self.position
)
}
}
/// Glyphs produces [glyphs](Glyph) for a source text; i.e., it is an iterator of [Glyph] values.
/// Glyphs is used to control reading from the source text and keeps a lookahead buffer of glyphs
/// that have not been processed. While a [crate::lex::Lexer] is responsible for the creation and
/// iteration of [tokens](crate::lex::Token), Glyphs is responsible for the creation and iteration
/// of glyphs.
pub struct Glyphs<'text> {
source: Chars<'text>,
next_position: Position,
bytes_read: u64,
lookahead: VecDeque<Glyph>,
}
impl<'text> Glyphs<'text> {
pub fn new(source: &'text str) -> Self {
// neat
Self {
source: source.chars(),
next_position: Position::start(),
bytes_read: 0,
lookahead: VecDeque::new(),
}
}
/// reads the next n characters from the source text into our lookahead buffer
fn fill_lookahead(&mut self, n: usize) -> bool {
while self.lookahead.len() < n {
let c = match self.source.next() {
Some(c) => c,
None => break,
};
let len = c.len_utf8();
let start = self.bytes_read;
self.bytes_read += len as u64;
let position = if c == '\n' {
self.next_position.incr_line()
} else {
self.next_position.incr()
};
self.lookahead.push_back(Glyph {
glyph: c,
position,
bytes: Range {
start,
end: self.bytes_read,
},
})
}
self.lookahead.len() == n
}
/// returns a reference to the next character from the source text, advancing our internal
/// lookahead buffer if necessary. Returns None if we're already at the end of our source text.
pub fn peek(&mut self) -> Option<&Glyph> {
self.peek_at(0)
}
/// takes the next character from our input text
pub fn pop(&mut self) -> Result<Glyph, LexError> {
self.next().ok_or(LexError::UnexpectedEOF)
}
/// returns a reference to a character in our lookahead buffer at a given position. This allows
/// us to perform a lookahead read without consuming any tokens, maintaining our current
/// position and keeping our unconsumed characters safe.
fn peek_at(&mut self, idx: usize) -> Option<&Glyph> {
self.fill_lookahead(idx + 1);
self.lookahead.get(idx)
}
/// discards characters from our current position so long as the upcoming characters match some
/// predicate. This is called yeet_while instead of skip_while in order to avoid conflicting
/// with the
/// [skip_while](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.skip_while)
/// method of the stdlib Iterator trait.
pub fn yeet_while<F>(&mut self, mut pred: F)
where
F: FnMut(&Glyph) -> bool,
{
while let Some(g) = self.peek() {
if pred(&g) {
self.next();
} else {
return;
}
}
}
pub fn yeet_whitespace(&mut self) {
self.yeet_while(|tg| tg.glyph.is_whitespace());
}
}
impl<'text> Iterator for Glyphs<'text> {
type Item = Glyph;
fn next(&mut self) -> Option<Self::Item> {
self.fill_lookahead(1);
self.lookahead.pop_front()
}
}
Loading…
Cancel
Save