Validator Code Made Public
4 participantes
Página 1 de 1.
Validator Code Made Public
-- Below is the code for the validator. This is written in PL/SQL as that allowed easy integration w/ the original online APEX site.
-- It is recursive, and calls some private functions but their use should be obvious from their names.
-- due to limitations on the # of cursors we can have open at one time (and thus depth of recursivity) we do backtrack 5 movie names when we are close to the limit and set this as a new starting point. Shows one reason why its bad to do recursion.
-- any comments, bugs or if you need the called functions please reply below
create or replace function CHECK_MOVIE_STRING2(inString clob, inFirstWord varchar2, inWords number, inCursors number default 0, totalWordsToSkip number default 0, solutionID number)
return number
is
-- get all unused movies who match the start. Use first firstWord for performance optimization
CURSOR aMovie_cur
IS
select id, fullName, firstWord, words, length
from MovieWords
where firstWord = inFirstWord
and fullName = to_char(substr(inString,1,length))
and used = 'N';
aMovie_rec aMovie_cur%ROWTYPE;
newString varchar2(32767); -- next concatenated string to use
newFirstWord varchar2(4000); -- next first word
backtrack number := 5; -- how many movie names to backtrack. Max 9. Used to avoid open cursor limit
maxCursors number := 100; -- max number of cursors to check;Used to avoid open cursor limit. Will backtrack when reached
isDone number; -- 0 false, 1 true, other number is number of words to skip to get for next section (avoid open cursor problem)
trackingSize number; -- for debugging
moviesFetched number; -- how many movies did this query get
whichMovie number := 0; -- counter of what movie we are in
lengthLastWord number; -- check size for padding
leastInWords number; -- used to track farthest point in the string tracked.
begin
select count(*)
into moviesFetched
from MovieWords
where firstWord = inFirstWord
and fullName = to_char(substr(inString,1,length))
and used = 'N';
if inCursors = 0 then -- do starting checks
-- check length of last work to avoid padding hack
select length(last_word(inString)) into lengthLastWord from dual;
if lengthLastWord > 20 then
return 0; -- invalid input if padded movie
end if;
-- simple check test to find only print cases.
-- CODE REMOVED
end if;
if inWords = 0 then
return 1; -- at end of string. Fully traced
elsif inCursors>maxCursors then ---- To Deep --- Need to Backtrack to Avoid Open Cursors -----
return totalWordsToSkip+backtrack/10; -- backtrack X movie names;
else -- still can trace forwards
open aMovie_cur;
loop
whichMovie:=whichMovie+1;
FETCH aMovie_cur INTO aMovie_rec;
EXIT WHEN aMovie_cur%NOTFOUND;
update MovieWords
set used='Y'
where id = aMovie_rec.id; -- set word as used
update coding_challenge_solution
set used_movies = used_movies || aMovie_rec.fullName || '\\'
where solution_id = solutionID;
-- check if we have gone farther than before for tracking purposes.
select least_inwords into leastInWords from coding_challenge_solution
where solution_id = solutionID;
-- if longer then update tracking column
if leastInWords is null or leastInWords > inWords then
update coding_challenge_solution
set least_inwords=inWords,
last_movie=aMovie_rec.fullName
where solution_id = solutionID;
end if;
if aMovie_rec.words=inWords then
return 1; -- Done. Match the last words.
end if;
for wordsToSkip in reverse 1..(aMovie_rec.words-1) -- check for next overlapping movie, starting w/ the least overlap
loop
newString := TRIM(SKIP_WORDS(substr(inString, 1, 32767), wordsToSkip));
newFirstWord := TRIM( FIRST_WORD(newString) );
isDone := CHECK_MOVIE_STRING2(newString, newFirstWord, (inWords - wordsToSkip), inCursors+1, totalWordsToSkip+wordsToSkip, solutionID);
if isDone=0
then null; -- failed, try another movie
elsif isDone=1 then
return 1; -- valid solution pass back
elsif isDone=trunc(isDone) then -- no decimals, so backtrack point already found
if inCursors=0 then -- back all the way, need to skip ahead
newString:=trim(Skip_Words(substr(inString, 1, 32767),isDone)); -- jump forward the words sent back by isDone
newFirstWord:=trim(First_Word(newString));
return CHECK_MOVIE_STRING2(newString, newFirstWord,count_Words(newString),0,0, solutionID);
else
return isDone; -- keep going back to beginning
end if;
else -- keep finding backtrack point
update MovieWords
set used='N'
where id = aMovie_rec.id; -- set word as not used, so can reuse
update coding_challenge_solution
set used_movies = replace(used_movies, aMovie_rec.fullName || '\\', '')
where solution_id = solutionID;
return (isDone-wordsToSkip-0.1); -- backtracking, don't count current words, and remove one decimal to indicate one backtrack step
end if;
end loop;
update MovieWords
set used='N'
where id = aMovie_rec.id; -- set word as not used. Was a dead end.
update coding_challenge_solution
set used_movies = replace(used_movies, aMovie_rec.fullName || '\\', '')
where solution_id = solutionID;
end loop;
close aMovie_cur;
end if;
return 0;
end;
-- It is recursive, and calls some private functions but their use should be obvious from their names.
-- due to limitations on the # of cursors we can have open at one time (and thus depth of recursivity) we do backtrack 5 movie names when we are close to the limit and set this as a new starting point. Shows one reason why its bad to do recursion.
-- any comments, bugs or if you need the called functions please reply below
create or replace function CHECK_MOVIE_STRING2(inString clob, inFirstWord varchar2, inWords number, inCursors number default 0, totalWordsToSkip number default 0, solutionID number)
return number
is
-- get all unused movies who match the start. Use first firstWord for performance optimization
CURSOR aMovie_cur
IS
select id, fullName, firstWord, words, length
from MovieWords
where firstWord = inFirstWord
and fullName = to_char(substr(inString,1,length))
and used = 'N';
aMovie_rec aMovie_cur%ROWTYPE;
newString varchar2(32767); -- next concatenated string to use
newFirstWord varchar2(4000); -- next first word
backtrack number := 5; -- how many movie names to backtrack. Max 9. Used to avoid open cursor limit
maxCursors number := 100; -- max number of cursors to check;Used to avoid open cursor limit. Will backtrack when reached
isDone number; -- 0 false, 1 true, other number is number of words to skip to get for next section (avoid open cursor problem)
trackingSize number; -- for debugging
moviesFetched number; -- how many movies did this query get
whichMovie number := 0; -- counter of what movie we are in
lengthLastWord number; -- check size for padding
leastInWords number; -- used to track farthest point in the string tracked.
begin
select count(*)
into moviesFetched
from MovieWords
where firstWord = inFirstWord
and fullName = to_char(substr(inString,1,length))
and used = 'N';
if inCursors = 0 then -- do starting checks
-- check length of last work to avoid padding hack
select length(last_word(inString)) into lengthLastWord from dual;
if lengthLastWord > 20 then
return 0; -- invalid input if padded movie
end if;
-- simple check test to find only print cases.
-- CODE REMOVED
end if;
if inWords = 0 then
return 1; -- at end of string. Fully traced
elsif inCursors>maxCursors then ---- To Deep --- Need to Backtrack to Avoid Open Cursors -----
return totalWordsToSkip+backtrack/10; -- backtrack X movie names;
else -- still can trace forwards
open aMovie_cur;
loop
whichMovie:=whichMovie+1;
FETCH aMovie_cur INTO aMovie_rec;
EXIT WHEN aMovie_cur%NOTFOUND;
update MovieWords
set used='Y'
where id = aMovie_rec.id; -- set word as used
update coding_challenge_solution
set used_movies = used_movies || aMovie_rec.fullName || '\\'
where solution_id = solutionID;
-- check if we have gone farther than before for tracking purposes.
select least_inwords into leastInWords from coding_challenge_solution
where solution_id = solutionID;
-- if longer then update tracking column
if leastInWords is null or leastInWords > inWords then
update coding_challenge_solution
set least_inwords=inWords,
last_movie=aMovie_rec.fullName
where solution_id = solutionID;
end if;
if aMovie_rec.words=inWords then
return 1; -- Done. Match the last words.
end if;
for wordsToSkip in reverse 1..(aMovie_rec.words-1) -- check for next overlapping movie, starting w/ the least overlap
loop
newString := TRIM(SKIP_WORDS(substr(inString, 1, 32767), wordsToSkip));
newFirstWord := TRIM( FIRST_WORD(newString) );
isDone := CHECK_MOVIE_STRING2(newString, newFirstWord, (inWords - wordsToSkip), inCursors+1, totalWordsToSkip+wordsToSkip, solutionID);
if isDone=0
then null; -- failed, try another movie
elsif isDone=1 then
return 1; -- valid solution pass back
elsif isDone=trunc(isDone) then -- no decimals, so backtrack point already found
if inCursors=0 then -- back all the way, need to skip ahead
newString:=trim(Skip_Words(substr(inString, 1, 32767),isDone)); -- jump forward the words sent back by isDone
newFirstWord:=trim(First_Word(newString));
return CHECK_MOVIE_STRING2(newString, newFirstWord,count_Words(newString),0,0, solutionID);
else
return isDone; -- keep going back to beginning
end if;
else -- keep finding backtrack point
update MovieWords
set used='N'
where id = aMovie_rec.id; -- set word as not used, so can reuse
update coding_challenge_solution
set used_movies = replace(used_movies, aMovie_rec.fullName || '\\', '')
where solution_id = solutionID;
return (isDone-wordsToSkip-0.1); -- backtracking, don't count current words, and remove one decimal to indicate one backtrack step
end if;
end loop;
update MovieWords
set used='N'
where id = aMovie_rec.id; -- set word as not used. Was a dead end.
update coding_challenge_solution
set used_movies = replace(used_movies, aMovie_rec.fullName || '\\', '')
where solution_id = solutionID;
end loop;
close aMovie_cur;
end if;
return 0;
end;
erikpeterson- Mensajes : 25
Fecha de inscripción : 06/01/2016
Re: Validator Code Made Public
I think the validator is failing to handle multiple words overlapping.
Given the following titles (all of them in the dataset):
1) adventures of juku the dog
2) the dogs of war
3) dogs of war
Joining 1 and 2 results in: "adventures of juku the dogs of war"
The validator finds "adventures of juku the dog" and then proceeds to look for a string starting with "dog".
This, results in finding "dogs of war" instead of "the dogs of war".
Hence, if trying to use "dogs of war" later (most of my results), the validator return invalid when it is not!
Given the following titles (all of them in the dataset):
1) adventures of juku the dog
2) the dogs of war
3) dogs of war
Joining 1 and 2 results in: "adventures of juku the dogs of war"
The validator finds "adventures of juku the dog" and then proceeds to look for a string starting with "dog".
This, results in finding "dogs of war" instead of "the dogs of war".
Hence, if trying to use "dogs of war" later (most of my results), the validator return invalid when it is not!
masp- Mensajes : 15
Fecha de inscripción : 07/01/2016
Re: Validator Code Made Public
Wow! That's a tough testcase. How could the validator know if you're using "the dogs of war" insetad of "dogs of war" or viceversa? I mean, it could fail in both ways, thinking you're using one when really you're using the other one, and removing one posibile title.
casvel- Mensajes : 2
Fecha de inscripción : 20/11/2015
Re: Validator Code Made Public
Actually the reason the validator failed in this case was not because multiple words where overlapping. That it does handle.
It is because the two occasions when it was used in MASP code was so far apart that checkpoints I use for the recursive solution prevent it from returning far enough for this.
I did a specific fix for this case. If someone else has a similar case, please let me know.
It is because the two occasions when it was used in MASP code was so far apart that checkpoints I use for the recursive solution prevent it from returning far enough for this.
I did a specific fix for this case. If someone else has a similar case, please let me know.
erikpeterson- Mensajes : 25
Fecha de inscripción : 06/01/2016
Re: Validator Code Made Public
Here's a similar case:
death race 2
death race
race 2
So if I use "death race 2 guns ..." and so on, then I can't use "death race" again, even though the "death race 2" was a single movie, not the combination of "death race" and "race 2"
BTW, the validator I sent you does not have this problem (it searches through the whole space, and it doesn't use recursion).
death race 2
death race
race 2
So if I use "death race 2 guns ..." and so on, then I can't use "death race" again, even though the "death race 2" was a single movie, not the combination of "death race" and "race 2"
BTW, the validator I sent you does not have this problem (it searches through the whole space, and it doesn't use recursion).
mraggi- Mensajes : 18
Fecha de inscripción : 16/12/2015
Re: Validator Code Made Public
mraggi. Did you get this double case in one of your solutions?
erikpeterson- Mensajes : 25
Fecha de inscripción : 06/01/2016
Re: Validator Code Made Public
Yes, I did indeed. I sent the test case to Antonio Hernandez, but I'll send it to you too by email.
mraggi- Mensajes : 18
Fecha de inscripción : 16/12/2015
Página 1 de 1.
Permisos de este foro:
No puedes responder a temas en este foro.