You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
190 lines
7.2 KiB
C
190 lines
7.2 KiB
C
11 years ago
|
// Copyright 2010 Wincent Colaiuta. All rights reserved.
|
||
|
//
|
||
|
// Redistribution and use in source and binary forms, with or without
|
||
|
// modification, are permitted provided that the following conditions are met:
|
||
|
//
|
||
|
// 1. Redistributions of source code must retain the above copyright notice,
|
||
|
// this list of conditions and the following disclaimer.
|
||
|
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
||
|
// this list of conditions and the following disclaimer in the documentation
|
||
|
// and/or other materials provided with the distribution.
|
||
|
//
|
||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||
|
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
// POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
||
|
#include "match.h"
|
||
|
#include "ext.h"
|
||
|
#include "ruby_compat.h"
|
||
|
|
||
|
// use a struct to make passing params during recursion easier
|
||
|
typedef struct
|
||
|
{
|
||
|
char *str_p; // pointer to string to be searched
|
||
|
long str_len; // length of same
|
||
|
char *abbrev_p; // pointer to search string (abbreviation)
|
||
|
long abbrev_len; // length of same
|
||
|
double max_score_per_char;
|
||
|
int dot_file; // boolean: true if str is a dot-file
|
||
|
int always_show_dot_files; // boolean
|
||
|
int never_show_dot_files; // boolean
|
||
|
} matchinfo_t;
|
||
|
|
||
|
double recursive_match(matchinfo_t *m, // sharable meta-data
|
||
|
long str_idx, // where in the path string to start
|
||
|
long abbrev_idx, // where in the search string to start
|
||
|
long last_idx, // location of last matched character
|
||
|
double score) // cumulative score so far
|
||
|
{
|
||
|
double seen_score = 0; // remember best score seen via recursion
|
||
|
int dot_file_match = 0; // true if abbrev matches a dot-file
|
||
|
int dot_search = 0; // true if searching for a dot
|
||
|
|
||
|
for (long i = abbrev_idx; i < m->abbrev_len; i++)
|
||
|
{
|
||
|
char c = m->abbrev_p[i];
|
||
|
if (c == '.')
|
||
|
dot_search = 1;
|
||
|
int found = 0;
|
||
|
for (long j = str_idx; j < m->str_len; j++, str_idx++)
|
||
|
{
|
||
|
char d = m->str_p[j];
|
||
|
if (d == '.')
|
||
|
{
|
||
|
if (j == 0 || m->str_p[j - 1] == '/')
|
||
|
{
|
||
|
m->dot_file = 1; // this is a dot-file
|
||
|
if (dot_search) // and we are searching for a dot
|
||
|
dot_file_match = 1; // so this must be a match
|
||
|
}
|
||
|
}
|
||
|
else if (d >= 'A' && d <= 'Z')
|
||
|
d += 'a' - 'A'; // add 32 to downcase
|
||
|
if (c == d)
|
||
|
{
|
||
|
found = 1;
|
||
|
dot_search = 0;
|
||
|
|
||
|
// calculate score
|
||
|
double score_for_char = m->max_score_per_char;
|
||
|
long distance = j - last_idx;
|
||
|
if (distance > 1)
|
||
|
{
|
||
|
double factor = 1.0;
|
||
|
char last = m->str_p[j - 1];
|
||
|
char curr = m->str_p[j]; // case matters, so get again
|
||
|
if (last == '/')
|
||
|
factor = 0.9;
|
||
|
else if (last == '-' ||
|
||
|
last == '_' ||
|
||
|
last == ' ' ||
|
||
|
(last >= '0' && last <= '9'))
|
||
|
factor = 0.8;
|
||
|
else if (last >= 'a' && last <= 'z' &&
|
||
|
curr >= 'A' && curr <= 'Z')
|
||
|
factor = 0.8;
|
||
|
else if (last == '.')
|
||
|
factor = 0.7;
|
||
|
else
|
||
|
// if no "special" chars behind char, factor diminishes
|
||
|
// as distance from last matched char increases
|
||
|
factor = (1.0 / distance) * 0.75;
|
||
|
score_for_char *= factor;
|
||
|
}
|
||
|
|
||
|
if (++j < m->str_len)
|
||
|
{
|
||
|
// bump cursor one char to the right and
|
||
|
// use recursion to try and find a better match
|
||
|
double sub_score = recursive_match(m, j, i, last_idx, score);
|
||
|
if (sub_score > seen_score)
|
||
|
seen_score = sub_score;
|
||
|
}
|
||
|
|
||
|
score += score_for_char;
|
||
|
last_idx = str_idx++;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (!found)
|
||
|
return 0.0;
|
||
|
}
|
||
|
if (m->dot_file)
|
||
|
{
|
||
|
if (m->never_show_dot_files ||
|
||
|
(!dot_file_match && !m->always_show_dot_files))
|
||
|
return 0.0;
|
||
|
}
|
||
|
return (score > seen_score) ? score : seen_score;
|
||
|
}
|
||
|
|
||
|
// Match.new abbrev, string, options = {}
|
||
|
VALUE CommandTMatch_initialize(int argc, VALUE *argv, VALUE self)
|
||
|
{
|
||
|
// process arguments: 2 mandatory, 1 optional
|
||
|
VALUE str, abbrev, options;
|
||
|
if (rb_scan_args(argc, argv, "21", &str, &abbrev, &options) == 2)
|
||
|
options = Qnil;
|
||
|
str = StringValue(str);
|
||
|
abbrev = StringValue(abbrev); // already downcased by caller
|
||
|
|
||
|
// check optional options hash for overrides
|
||
|
VALUE always_show_dot_files = CommandT_option_from_hash("always_show_dot_files", options);
|
||
|
VALUE never_show_dot_files = CommandT_option_from_hash("never_show_dot_files", options);
|
||
|
|
||
|
matchinfo_t m;
|
||
|
m.str_p = RSTRING_PTR(str);
|
||
|
m.str_len = RSTRING_LEN(str);
|
||
|
m.abbrev_p = RSTRING_PTR(abbrev);
|
||
|
m.abbrev_len = RSTRING_LEN(abbrev);
|
||
|
m.max_score_per_char = (1.0 / m.str_len + 1.0 / m.abbrev_len) / 2;
|
||
|
m.dot_file = 0;
|
||
|
m.always_show_dot_files = always_show_dot_files == Qtrue;
|
||
|
m.never_show_dot_files = never_show_dot_files == Qtrue;
|
||
|
|
||
|
// calculate score
|
||
|
double score = 1.0;
|
||
|
if (m.abbrev_len == 0) // special case for zero-length search string
|
||
|
{
|
||
|
// filter out dot files
|
||
|
if (!m.always_show_dot_files)
|
||
|
{
|
||
|
for (long i = 0; i < m.str_len; i++)
|
||
|
{
|
||
|
char c = m.str_p[i];
|
||
|
if (c == '.' && (i == 0 || m.str_p[i - 1] == '/'))
|
||
|
{
|
||
|
score = 0.0;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else // normal case
|
||
|
score = recursive_match(&m, 0, 0, 0, 0.0);
|
||
|
|
||
|
// clean-up and final book-keeping
|
||
|
rb_iv_set(self, "@score", rb_float_new(score));
|
||
|
rb_iv_set(self, "@str", str);
|
||
|
return Qnil;
|
||
|
}
|
||
|
|
||
|
VALUE CommandTMatch_matches(VALUE self)
|
||
|
{
|
||
|
double score = NUM2DBL(rb_iv_get(self, "@score"));
|
||
|
return score > 0 ? Qtrue : Qfalse;
|
||
|
}
|
||
|
|
||
|
VALUE CommandTMatch_to_s(VALUE self)
|
||
|
{
|
||
|
return rb_iv_get(self, "@str");
|
||
|
}
|