#!/usr/bin/perl

#   checkdwn.pl - Detects common errors in DWN
#   Copyright (c) 2002,3  Thomas Bliesener <nospam@melix.com.mx>
#                 2002,3  Martin Schulze <joey@infodrom.org>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc.,59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

use strict;
use warnings;

my ($line) = 0;
my ($spaces);
my ($marker);
my ($last) = "";
my ($free) = 0;
my ($verbose) = 0;
my ($msg);
my (@dwn);

if ($ARGV[0] eq "-v" or $ARGV[0] eq "--verbose") {
	$verbose = 1;
	shift;
}

@dwn = <>;

print "\n";
while ($_ = $dwn[$line]){
	$line++;

# Search XXX
	if (/(.*)XXX/) {	
		$marker = "^^^";
		$msg = 'XXX';
		&print_error;
	}
	
# Search for double words in a line
	if (/(.*?)\b(\w+)\s+\2\b/ig) {
		if ($2 !~ /(der|die|das)/) {
			$marker = "^"x(length($2))." "."^"x(length($2));
			$msg = 'Double words';
			&print_error;
		}	
	}
	
# Search for double words separated by a newline
	if (/()(^\S+?)\b.*\b(\S+?$)/i) {
		if (($last eq $2) and ($2 !~ /(der|die|das)/)) {
			$marker = "^"x(length($last));
			$msg = 'Double words (two lines)';
			&print_error;
		}
		$last = $3;
	}

# Search for "http://www.debian.org" instead of $(HOME)	
	if (m#(.*?)http://www.debian.org#g) { 
		$marker = "^"x(length("http://www.debian.org"));
		$msg = 'http://www.debian.org instead of $(HOME)';
		&print_error;
	}	

# Missing space before/after an HTML tag?
	#opening tag
	if (m#(.*?\w+)<\w+#g) {
		$marker = "^";
		$msg = 'Missing space before opening HTML tag';
		&print_error;
	}

	#closing tag
	if (m#(.*?</\w+>)\w+#g) {
		$marker = "^";
		$msg = 'Missing space after closing HTML tag';
		&print_error;
	}	

# "Konquerer" instead of Konqueror?
	if (m#(.*Konquer)er#) {
		$marker = "^";
		$msg = 'Konquerer instead of Konqueror';
		&print_error;
	}	

# "DSFG" instead of DFSG?
       if (m#(.*)?DSFG#) {
		$marker = "^^^^";
		$msg = 'DSFG" instead of DFSG';
		&print_error;
       }

# "advise" instead of advice
       if (m#\b(.*advi)se\b#i) {
		$marker = "^";
		$msg ='advise instead of advice';
		&print_error;
       }

# "licence" instead of license
       if (m#\b(.*licen)ce.*\b#i) {
		$marker = "^";
		$msg = 'licence instead of license';
		&print_error;
       }

# "emphasise" instead of emphasize
       if (m#\b(.*emphasi)ze.*\b#i) {
		$marker = "^";
		$msg = 'emphasise instead of emphasize';
		&print_error;
       }

# "plesae" instead of please
       if (m#\b(.*ple)sae.*\b#i) {
		$marker = "^^";
		$msg = 'plesae instead of please';
		&print_error;
	}

# Useless backslash
	if (m#(.*?)\\[^\n]#) {
		$marker = "^";
		$msg = 'Useless backslash';
		&print_error;
	}	

# Missing closing quotation mark for wml header
	if (/(^#use wml::debian::weeklynews::header.*?)[^"]\n/) {
		$marker = "^";
		$msg = 'Missing closing quotation mark for wml header';
		&print_error;
 	}

# Multiple punctuation marks
	if (m#(.*?[^.])[.!?]{2}(?!\.)#) {
		$marker = "^^";
		$msg = 'Multiple punctuation marks';
		&print_error;
	}

# bare ampersand
	if (m%(.*?)(\&(?!(amp|nbsp|mdash|quot|lt|gt|aacute|euro|auml|uuml|ouml|euml|#\d{3,5});))%) {
		$marker = "^";
		$msg = 'Bare ampersand';
		&print_error;
	}

# *E*nglish, *G*erman ...
	if (/(.*)[ ^](english|german|french|dutch|italian|swedish|finnish|american|canadian|australian|chinese|danish|portuguese|hungarian|indian|irish|austrian|swiss|polish|norwegian|belgian|spanish|russian)[ \n\\]/) {
		$marker = " ^";
		$msg = 'English, German...';
		&print_error;
	}

# Free software
	if (m#(.*[ ^])(free software)#) {
		$marker = "^    ^";
		$msg = 'Free software';
		&print_error;
	}
	
	#separated by \n
	if ($free && /^software()/) {
		$marker = "^";
		$msg = 'Free software (two lines)';
		&print_error;
	}
	if (/.*[ ^]free$/i) {$free = 1}
	else {$free = 0;}

# Open <strong> tag
	if (/^(<p>)<strong>/ and not /<\/strong>/ and $dwn[$line] !~ /.*<\/strong>/) {
		$marker = "^^^^^^^^";
		$msg = 'Open <strong> tag';
		&print_error;
	}

# There is no such thing
       if (m#\b(.*split)ted\b#i) {
		$marker = "^^^";
 		$msg = 'splitted instead of split';
		&print_error;
	}

# Broken link
	if (m#(.*(\Q$(HOME)\E))[^/]#) {
		$marker = "^";
		$msg = 'Broken link';
		&print_error;
	}

# Missing &nbsp; before MB
	if (/(.*)(\d+\s*(GB|MB|%))/) {
		$marker = "^"x(length($2));
		$msg = 'Missing &nbsp; before GB, MB or %';
		&print_error;
	}

# Missing trailing backslash
	if (/(.*href="[^"]+">$)/) {
		$marker = "^";
		$msg = 'Missing trailing backslash';
		&print_error;
	}

# Broken URL
	if (/(href="[^"]+)[^"]$/) {
		$marker = "^";
		$msg = 'Broken URL';
		&print_error;
	}

# Debain
	if (/(.*)Debain/) {
		$marker = "^^^^^^";
		$msg = 'Debain';
		&print_error;
	}

# Triple consonant
	if (/(.*)([b-z])\2\2[aeiou ]/) {
		$marker = "^^^";
		$msg = 'Triple consonant';
		&print_error;
	}

# Capital letter, Security Updates
	if (/(^    )[a-z]/) {
		$marker = "^";
		$msg = 'Capital letter';
		&print_error;
	}

# Capital letter, New and Orphaned packages
	if (/(^     ?-- )[a-z]/) {
		$marker = "^";
		$msg = 'Capital letter';
		&print_error;
	}

# Missing period, Security Updates
	if (/(^    [\w ]+)[^.]$/) {
		$marker = "^";
		$msg = 'Missing period';
		&print_error;
	}

# Missing period, New packages
	if (/(^    -- [\w ]+[^.])<\/li>$/) {
		$marker = "^";
		$msg = 'Missing period';
		&print_error;
	}

# Missing period, Orphaned packages
	if (/(^     -- [\w ]+)[^.]$/) {
		$marker = "^";
		$msg = 'Missing period';
		&print_error;
	}
}

#Print the line and show the mistake
sub print_error {
	if ($verbose == 1) {print "\033[31;1m$msg\033[0m\n"}
	$spaces = " "x(length("$line: $1"));
	print "$line: $_$spaces\033[31;1m$marker\033[0m\n";
}	
