spip_screenshot_remove.pl 2.65 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
#!/usr/bin/perl
use strict;
use Data::Dumper;

#
#  Remove already created screenshots from a spip xml dump
#
#  Copyright (C) 2006 Bruno Coudoin.
#
#  spip_screenshot_remove is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  version 2 published by the Free Software Foundation.
#
#  Intltool is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  As a special exception to the GNU General Public License, if you
#  distribute this file as part of a program that contains a
#  configuration script generated by Autoconf, you may include it under
#  the same distribution terms that you use for the rest of that program.
#
#  Authors: Bruno Coudoin <bruno.coudoin@free.fr>
#

# -----------------------------------------------------------------------------------------
# USAGE:
# This tool requires the spip xml database as the first parameter.
# The new base will on stdout
#------------------------------------------------------------------------------------------


if (! $ARGV[0] || ! -f $ARGV[0])
  {
    print "Usage: spip_screenshot_remove <spip xml dump>\n";
  }

open(SPIP, "<$ARGV[0]");

my $in = 0;
my $date;
my $tmp_txt;
my $current_article;
while (<SPIP>){
  my $line = $_;

  my $article = ($line =~ /<id_article>([0-9]+)<\/id_article>/)[0];
  $current_article = $article if($article);

  chomp($line);
  #print ":$line:\n";

  if($line =~ /\<article\>/)
    {
      $in = 1;
      if ($tmp_txt != "\n")
	{
	 print $tmp_txt;
	}
      $tmp_txt = $tmp_txt . $line . "\n";
    }
  elsif($line =~ /<\/article>/)
    {
      if($in == 0)
	{
	  # It's a valid article
	  print $tmp_txt . $line . "\n";
	}
      else
	{
	  #print "FILTER OUT: $current_article\n";
	}
      $in = 0;
      $date = "";
      $tmp_txt = "";
    }
  else
    {
      $tmp_txt = $tmp_txt . $line . "\n";
    }

  if($in)
    {
      # We are in an article, track the date fields
      # If all 3 dates are the same, then it's a generated article.
      for my $field qw/date date_redac date_modif/
	{
	 if($line =~ /<$field>/)
	 {
	   my $newdate = ($line =~ /<$field>([0-9\-: ]+)<\/$field>/)[0];
	   if($date == "")
	     {
	       $date = $newdate;
	     }
	   elsif($newdate != $date)
	     {
	       $in = 0;
	       $date = "";
	     }
	 }
	}
    }
}

print $tmp_txt;

close SPIP;

exit 0;