code organization
parent
97602bf42e
commit
5108e4457f
@ -0,0 +1,3 @@
|
|||||||
|
pub trait BuiltinFn {
|
||||||
|
fn call(&self, args: Vec<&str>);
|
||||||
|
}
|
@ -0,0 +1,172 @@
|
|||||||
|
use crate::error::LexError;
|
||||||
|
use std::{collections::VecDeque, fmt, ops::Range, str::Chars};
|
||||||
|
|
||||||
|
/// The position of a specific glyph within a corpus of text. We use this for rendering error
|
||||||
|
/// messages and communicating to the user the location of errors.
|
||||||
|
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||||
|
pub struct Position {
|
||||||
|
/// The visual line in which this glyph appears in the source text
|
||||||
|
pub line: u64,
|
||||||
|
|
||||||
|
/// The visual column in which this glyph appears in the source text
|
||||||
|
pub column: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Position {
|
||||||
|
pub fn start() -> Self {
|
||||||
|
Self { line: 0, column: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increments position by column, going from the current line,column position to the next
|
||||||
|
/// column on the same line.
|
||||||
|
pub fn incr(&mut self) -> Position {
|
||||||
|
let p = *self;
|
||||||
|
self.column += 1;
|
||||||
|
p
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increments the position by line, going from the current line,column position to the
|
||||||
|
/// beginning of the next line.
|
||||||
|
pub fn incr_line(&mut self) -> Position {
|
||||||
|
let p = *self;
|
||||||
|
self.column = 0;
|
||||||
|
self.line += 1;
|
||||||
|
p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Clone)]
|
||||||
|
pub struct Glyph {
|
||||||
|
/// the unicode code point of the glyph
|
||||||
|
pub glyph: char,
|
||||||
|
|
||||||
|
/// The visual position in which the glyph appears; i.e., the human-comprehensible location
|
||||||
|
/// of the glyph in the source text
|
||||||
|
pub position: Position,
|
||||||
|
|
||||||
|
/// The byte offsets corresponding to this topoglyph in the source data; i.e., the
|
||||||
|
/// machine-comprehensible location of the glyph in the source text
|
||||||
|
pub bytes: Range<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Glyph {
|
||||||
|
pub fn is_word(&self) -> bool {
|
||||||
|
self.glyph.is_alphanumeric() || self.glyph == '.'
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_glob(&self) -> bool {
|
||||||
|
self.is_word() || self.glyph == '*'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Glyph {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"[{char} ({pos:?})]",
|
||||||
|
char = self.glyph,
|
||||||
|
pos = self.position
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Glyphs produces [glyphs](Glyph) for a source text; i.e., it is an iterator of [Glyph] values.
|
||||||
|
/// Glyphs is used to control reading from the source text and keeps a lookahead buffer of glyphs
|
||||||
|
/// that have not been processed. While a [crate::lex::Lexer] is responsible for the creation and
|
||||||
|
/// iteration of [tokens](crate::lex::Token), Glyphs is responsible for the creation and iteration
|
||||||
|
/// of glyphs.
|
||||||
|
pub struct Glyphs<'text> {
|
||||||
|
source: Chars<'text>,
|
||||||
|
next_position: Position,
|
||||||
|
bytes_read: u64,
|
||||||
|
lookahead: VecDeque<Glyph>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'text> Glyphs<'text> {
|
||||||
|
pub fn new(source: &'text str) -> Self {
|
||||||
|
// neat
|
||||||
|
Self {
|
||||||
|
source: source.chars(),
|
||||||
|
next_position: Position::start(),
|
||||||
|
bytes_read: 0,
|
||||||
|
lookahead: VecDeque::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// reads the next n characters from the source text into our lookahead buffer
|
||||||
|
fn fill_lookahead(&mut self, n: usize) -> bool {
|
||||||
|
while self.lookahead.len() < n {
|
||||||
|
let c = match self.source.next() {
|
||||||
|
Some(c) => c,
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
|
||||||
|
let len = c.len_utf8();
|
||||||
|
let start = self.bytes_read;
|
||||||
|
self.bytes_read += len as u64;
|
||||||
|
let position = if c == '\n' {
|
||||||
|
self.next_position.incr_line()
|
||||||
|
} else {
|
||||||
|
self.next_position.incr()
|
||||||
|
};
|
||||||
|
self.lookahead.push_back(Glyph {
|
||||||
|
glyph: c,
|
||||||
|
position,
|
||||||
|
bytes: Range {
|
||||||
|
start,
|
||||||
|
end: self.bytes_read,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
self.lookahead.len() == n
|
||||||
|
}
|
||||||
|
|
||||||
|
/// returns a reference to the next character from the source text, advancing our internal
|
||||||
|
/// lookahead buffer if necessary. Returns None if we're already at the end of our source text.
|
||||||
|
pub fn peek(&mut self) -> Option<&Glyph> {
|
||||||
|
self.peek_at(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// takes the next character from our input text
|
||||||
|
pub fn pop(&mut self) -> Result<Glyph, LexError> {
|
||||||
|
self.next().ok_or(LexError::UnexpectedEOF)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// returns a reference to a character in our lookahead buffer at a given position. This allows
|
||||||
|
/// us to perform a lookahead read without consuming any tokens, maintaining our current
|
||||||
|
/// position and keeping our unconsumed characters safe.
|
||||||
|
fn peek_at(&mut self, idx: usize) -> Option<&Glyph> {
|
||||||
|
self.fill_lookahead(idx + 1);
|
||||||
|
self.lookahead.get(idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// discards characters from our current position so long as the upcoming characters match some
|
||||||
|
/// predicate. This is called yeet_while instead of skip_while in order to avoid conflicting
|
||||||
|
/// with the
|
||||||
|
/// [skip_while](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.skip_while)
|
||||||
|
/// method of the stdlib Iterator trait.
|
||||||
|
pub fn yeet_while<F>(&mut self, mut pred: F)
|
||||||
|
where
|
||||||
|
F: FnMut(&Glyph) -> bool,
|
||||||
|
{
|
||||||
|
while let Some(g) = self.peek() {
|
||||||
|
if pred(&g) {
|
||||||
|
self.next();
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn yeet_whitespace(&mut self) {
|
||||||
|
self.yeet_while(|tg| tg.glyph.is_whitespace());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'text> Iterator for Glyphs<'text> {
|
||||||
|
type Item = Glyph;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.fill_lookahead(1);
|
||||||
|
self.lookahead.pop_front()
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue