Index: class/security/pipeline.class.php =================================================================== --- class/security/pipeline.class.php (revision 2305) +++ class/security/pipeline.class.php (working copy) @@ -9,7 +9,7 @@ */ - include_once( PLOG_CLASS_PATH."class/object/object.class.php" ); + include_once( PLOG_CLASS_PATH."class/object/object.class.php" ); include_once( PLOG_CLASS_PATH."class/dao/bloginfo.class.php" ); include_once( PLOG_CLASS_PATH."class/config/config.class.php" ); include_once( PLOG_CLASS_PATH."class/security/nullpipelinefilter.class.php" ); @@ -39,14 +39,14 @@ * The out of the box implementation of the Pipeline comes with a null filter (a filter that does nothing -- go figure :)) * and a filter that implements a Bayesian filter for advanced spam protection. See the BayesianFilter class for more information. */ - class Pipeline extends Object - { + class Pipeline extends Object + { - /** + /** * HTTP request that will be used if the filter is doing * some content filtering */ - var $_httpRequest; + var $_httpRequest; /** * the BlogInfo object that has information about the blog @@ -66,9 +66,9 @@ * @param blogInfo The BlogInfo object with information about the blog * that is currently executing this pipeline */ - function Pipeline( $httpRequest, $blogInfo = null ) + function Pipeline( $httpRequest, $blogInfo = null ) { - $this->Object(); + $this->Object(); $this->_httpRequest = $httpRequest; $this->_blogInfo = $blogInfo; @@ -122,10 +122,10 @@ { global $_pLogPipelineRegisteredFilters; - $pipelineRequest = new PipelineRequest( $this->_httpRequest, $this->_blogInfo ); + $pipelineRequest = new PipelineRequest( $this->_httpRequest, $this->_blogInfo ); - foreach( $_pLogPipelineRegisteredFilters as $filterClass ) { - array_push( $this->_filters, new $filterClass( $pipelineRequest )); + foreach( $_pLogPipelineRegisteredFilters as $filterClass ) { + array_push( $this->_filters, new $filterClass( $pipelineRequest )); } return true; @@ -139,58 +139,55 @@ { global $_pLogPipelineRegisteredFilters; - // check if the pipeline is enabled + // check if the pipeline is enabled $config =& Config::getConfig(); if( $config->getValue( "security_pipeline_enabled" ) == false ) { - // pipeline is disabled, so everything's fine - return new PipelineResult( true ); + // pipeline is disabled, so everything's fine + return new PipelineResult( true ); } - // boolean to indicate whether we should run the bayesian filter - // This is a hack. We don't want to run the bayesian filter on - // a message that will be rejected by another filter. The only way - // to know this is to run after all of the other filters have run. - // - // Ideally, once a message is rejected, we would want to let all - // of the filters know about it. This way they could do something - // interesting (i.e. train the message as spam, report the ip address - // and urls to dnsbl services) - - // default it to false. - $runBayesianFilter = false; - - // if enabled, then check all the filters - foreach( $_pLogPipelineRegisteredFilters as $filterClass ) { - if ( strcmp( $filterClass, "BayesianFilter" ) == 0 ) { - $runBayesianFilter = true; - } - // create an instance of the filter - $pipelineRequest = new PipelineRequest( $this->_httpRequest, $this->_blogInfo ); + // Assume that this will be successful + $this->_result = new PipelineResult( true ); - $filter = new $filterClass( $pipelineRequest ); - // and execute it... - $result = $filter->filter(); + // if enabled, then check all the filters + foreach( $_pLogPipelineRegisteredFilters as $filterClass ) { + // create an instance of the filter + $pipelineRequest = new PipelineRequest( $this->_httpRequest, $this->_blogInfo ); + $filter = new $filterClass( $pipelineRequest ); + // and execute it... + $result = $filter->filter(); // if there was an error, we better say so now // and quite, making sure that we're keeping the // error code - if( !$result->isValid()) { - $this->_result = $result; - return $result; + if( !$result->isValid()) { + // Save off the result + $this->_result = $result; + + // break out of this loop + break; } } - - if ( $runBayesianFilter ) - { - // create an instance of the filter - $pipelineRequest = new PipelineRequest( $this->_httpRequest, $this->_blogInfo ); - - $filter = new BayesianFilter( $pipelineRequest ); - // and execute it... - $result = $filter->filter(); + + // If one of the filters returns that this was not a valid result + if ( !$this->_result->isValid() ) { + // Now rerun through all of the filters so they can clean up + // if they have saved anything persistantly + // This also gives filters a chance to do anything else they + // want to do (i.e. report ip address to dns blacklist) + + foreach( $_pLogPipelineRegisteredFilters as $filterClass ) { + // create an instance of the filter + $pipelineRequest = new PipelineRequest( $this->_httpRequest, + $this->_blogInfo, + true ); + $filter = new $filterClass( $pipelineRequest ); + // and execute it... + $result = $filter->filter(); + // if there was an error, we want to keep going + } } - $this->_result = $result; - return $result; + return $this->_result ; } } ?> Index: class/security/commentfilter.class.php =================================================================== --- class/security/commentfilter.class.php (revision 2300) +++ class/security/commentfilter.class.php (working copy) @@ -40,6 +40,10 @@ if( $request->getValue( "op" ) != "AddComment" ) return new PipelineResult(); + // if this is already rejected, there is no reason to do anything here + if ( $this->_pipelineRequest->getRejectedState() ) + return new PipelineResult(); + // get the value of the maximum size of a comment, in bytes $config =& Config::getConfig(); $maxSize = $config->getValue( "maximum_comment_size" ); Index: class/security/pipelinerequest.class.php =================================================================== --- class/security/pipelinerequest.class.php (revision 2300) +++ class/security/pipelinerequest.class.php (working copy) @@ -16,6 +16,7 @@ var $_httpRequest; var $_blogInfo; + var $_requestRejected; /** * Constructor. @@ -24,16 +25,17 @@ * @param blogInfo A BlogInfo object with information about the blog * currently executing the request */ - function PipelineRequest( $httpRequest, $blogInfo ) + function PipelineRequest( $httpRequest, $blogInfo, $rejected = false ) { $this->Object(); if( is_array($httpRequest)) $this->_httpRequest = new Properties( $httpRequest ); else - $this->_httpRequest = $httpRequest; + $this->_httpRequest = $httpRequest; - $this->_blogInfo = $blogInfo; + $this->_blogInfo = $blogInfo; + $this->_requestRejected = $rejected; } /** @@ -52,5 +54,15 @@ { return $this->_httpRequest; } + + /** + * @return Returns a boolean that indicates if this pipeline request has + * already been rejected + */ + function getRejectedState() + { + return $this->_requestRejected; + } + } ?> Index: class/security/bayesianfilter.class.php =================================================================== --- class/security/bayesianfilter.class.php (revision 2300) +++ class/security/bayesianfilter.class.php (working copy) @@ -1,7 +1,7 @@ PipelineFilter( $pipelineRequest ); + $this->PipelineFilter( $pipelineRequest ); } /** @@ -48,21 +48,22 @@ */ function filter() { - $config =& Config::getConfig(); - - if (!$config->getValue("bayesian_filter_enabled")) - { - return new PipelineResult(true); - } - - // get some info + $config =& Config::getConfig(); + + if (!$config->getValue("bayesian_filter_enabled")) + { + return new PipelineResult(true); + } + + // get some info $blogInfo = $this->_pipelineRequest->getBlogInfo(); $request = $this->_pipelineRequest->getHttpRequest(); - + $previoslyRejected = $this->_pipelineRequest->getRejectedState(); + // we only have to filter the contents if the user is posting a comment // so there's no point in doing anything else if that's not the case if( $request->getValue( "op" ) != "AddComment" ) { - $result = new PipelineResult(); + $result = new PipelineResult(); return $result; } @@ -75,7 +76,7 @@ $articleId = $request->getValue( "articleId" ); $parentId = $request->getValue( "parentId" ); if( $parentId == "" ) - $parentId = 0; + $parentId = 0; $spamicity = $this->getSpamProbability($blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, $userUrl); @@ -90,27 +91,45 @@ // still be added but marked as spam and so on... sometimes breaking a few // rules makes things easier :) if( $config->getValue( "bayesian_filter_spam_comments_action" ) == BAYESIAN_FILTER_KEEP_COMMENT_ACTION ) { - $comments = new ArticleComments(); - $clientIp = Client::getIp(); - $comment = new UserComment( $articleId, $parentId, $commentTopic, $commentText, - null, $userName, $userEmail, $userUrl, $clientIp, - 0, COMMENT_STATUS_SPAM ); + $comments = new ArticleComments(); + $clientIp = Client::getIp(); + $comment = new UserComment( $articleId, $parentId, $commentTopic, $commentText, + null, $userName, $userEmail, $userUrl, $clientIp, + 0, COMMENT_STATUS_SPAM ); $comments->addComment( $comment ); } else { - // nothing to do here, simply throw the comment away + // nothing to do here, simply throw the comment away } - $spam = true; + $spam = true; } else { $result = new PipelineResult(true); - $spam = false; + $spam = false; } - - // train the filter with the message, be it spam or not... - BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, - $userUrl, $spam ); + + if ( !$previoslyRejected ) + { + // train the filter with the message, be it spam or not... + BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, + $userUrl, $spam ); + } + else + { + // This is a rejected message. If we think that this is non-spam, + // we want to untrain it and then retrain it as spam + if ( !$spam ) + { + // Un-train this non-spam + BayesianFilterCore::untrain( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, + $userUrl, $spam ); + + // train this as spam + BayesianFilterCore::train( $blogInfo->getId(), $commentTopic, $commentText, $userName, $userEmail, + $userUrl, true ); + } + } //print "

" . number_format($spamicity * 100, 0) . "% of spamicity

"; return $result; @@ -139,11 +158,11 @@ /** * @private */ - function _getMostSignificantTokens($blogId, $tokens) + function _getMostSignificantTokens($blogId, $tokens) { - $config =& Config::getConfig(); - - $bayesianFilterInfos = new BayesianFilterInfos(); + $config =& Config::getConfig(); + + $bayesianFilterInfos = new BayesianFilterInfos(); $bayesianFilterInfo = $bayesianFilterInfos->getBlogBayesianFilterInfo($blogId); $totalSpam = $bayesianFilterInfo->getTotalSpam(); @@ -153,7 +172,7 @@ foreach ($tokens as $token) { - $bayesianTokens->updateOccurrences($blogId, $token, 0, 0, $totalSpam, $totalNonSpam, false); + $bayesianTokens->updateOccurrences($blogId, $token, 0, 0, $totalSpam, $totalNonSpam, false); } $tokens = $bayesianTokens->getBayesianTokensFromArray($blogId, $tokens); @@ -161,9 +180,9 @@ foreach ($tokens as $token) { - if ($token->isSignificant() && $token->isValid()) - { - array_push($tempArray, abs($token->getProb() - 0.5)); + if ($token->isSignificant() && $token->isValid()) + { + array_push($tempArray, abs($token->getProb() - 0.5)); } } @@ -198,7 +217,7 @@ $productProb *= $token->getProb(); $productNoProb *= (1 - $token->getProb()); } - + return $productProb / ($productProb + $productNoProb); } }