Script usado en la migración del directorio de MediaWiki a Moin

Categorías: Wiki

Un script usado en la migración de Mediawiki a Moin

<?php


$host = "localhost";  //address of the mysql server
$usr = "offray";          //username
$passwd = "";   //password
$db = "wikisoftlibre";          //database

$link = mysql_pconnect($host,$usr,$passwd) or die(mysql_error());
mysql_select_db($db) or die("Could not select database");

$WIKI_LINK_START = "[";
$WIKI_LINK_END = "]";
$EXTERNAL_LINK_START = "[";
$EXTERNAL_LINK_END = "]";
$EXTERNAL_LINK_DIVIDER = " ";

$not_pages = array("Block_log", "Deletion_log", "Protection_log", "Upload_log", "1movedto2", "1movedto2_redir", "About", "Aboutpage","Aboutwikipedia", "Accesskey-compareselectedversions", "Accesskey-minoredit", "Accesskey-preview", "Accesskey-save", "Accesskey-search", "Accmailtext", "Accmailtitle", "Acct_creation_throttle_hit", "Actioncomplete", "Addedwatch", "Addedwatchtext", "Addsection", "Administrators", "Affirmation", "All", "Allmessages", "Allmessagestext", "Allpages", "Alphaindexline", "Alreadyloggedin", "Alreadyrolled", "Ancientpages", "And", "Anontalk", "Anontalkpagetext", "Anonymous", "Article", "Articleexists", "Articlepage", "Asksql", "Asksqltext", "Autoblocker", "Badarticleerror", "Badfilename", "Badfiletype", "Badipaddress", "Badquery", "Badquerytext", "Badretype", "Badtitle", "Badtitletext", "Blanknamespace", "Blockedtext", "Blockedtitle", "Blockip", "Blockipsuccesssub", "Blockipsuccesstext", "Blockiptext", "Blocklink", "Blocklistline", "Blocklogentry", "Blocklogpage", "Blocklogtext", "Bold_sample", "Bold_tip", "Booksources", "Booksourcetext", "Brokenredirects", "Brokenredirectstext", "Bugreports", "Bugreportspage", "Bureaucratlog", "Bureaucratlogentry", "Bureaucrattext", "Bureaucrattitle", "Bydate", "Byname", "Bysize", "Cachederror", "Cancel", "Cannotdelete", "Cantrollback", "Categories", "Categoriespagetext", "Category", "Category_header", "Categoryarticlecount", "Changepassword", "Changes", "Clearyourcache", "Columns", "Commentedit", "Compareselectedversions", "Confirm", "Confirmcheck", "Confirmdelete", "Confirmdeletetext", "Confirmprotect", "Confirmprotecttext", "Confirmunprotect", "Confirmunprotecttext", "Contextchars", "Contextlines", "Contribslink", "Contribsub", "Contributions", "Copyright", "Copyrightpage", "Copyrightpagename", "Copyrightwarning", "Couldntremove", "Createaccount", "Createaccountmail", "Creditspage", "Cur", "Currentevents", "Currentrev", "Databaseerror", "Dateformat", "Dberrortext", "Dberrortextcl", "Deadendpages", "Debug", "Defaultns", "Defemailsubject", "Delete", "Deletecomment", "Deletedarticle", "Deletedtext", "Deleteimg", "Deleteimgcompletely", "Deletepage", "Deletesub", "Deletethispage", "Deletionlog", "Dellogpage", "Dellogpagetext", "Developerspheading", "Developertext", "Developertitle", "Diff", "Difference", "Disambiguations", "Disambiguationspage", "Disambiguationstext", "Disclaimerpage", "Disclaimers", "Doubleredirects", "Doubleredirectstext", "Edit", "Editcomment", "Editconflict", "Editcurrent", "Edithelp", "Edithelppage", "Editing", "Editingold", "Editsection", "Editthispage", "Emailflag", "Emailforlost", "Emailfrom", "Emailmessage", "Emailpage", "Emailpagetext", "Emailsend", "Emailsent", "Emailsenttext", "Emailsubject", "Emailto", "Emailuser", "Emptyfile", "Enterlockreason", "Error", "Errorpagetitle", "Exbeforeblank", "Exblank", "Excontent", "Explainconflict", "Export", "Exportcuronly", "Exporttext", "Extlink_sample", "Extlink_tip", "Faq", "Faqpage", "Feedlinks", "Filecopyerror", "Filedeleteerror", "Filedesc", "Fileexists", "Filename", "Filenotfound", "Filerenameerror", "Filesource", "Filestatus", "Fileuploaded", "Formerror", "Fromwikipedia", "Getimagelist", "Go", "Googlesearch", "Guesstimezone", "Headline_sample", "Headline_tip", "Help", "Helppage", "Hide", "Hidetoc", "Hist", "Histlegend", "History", "History_copyright", "History_short", "Historywarning", "Hr_tip", "Ignorewarning", "Illegalfilename", "Ilshowmatch", "Ilsubmit", "Image_sample", "Image_tip", "Imagelinks", "Imagelist", "Imagelisttext", "Imagepage", "Imagereverted", "Imgdelete", "Imgdesc", "Imghistlegend", "Imghistory", "Imglegend", "Import", "Importfailed", "Importhistoryconflict", "Importnotext", "Importsuccess", "Importtext", "Info_short", "Infobox", "Infobox_alert", "Infosubtitle", "Internalerror", "Intl", "Ip_range_invalid", "Ipaddress", "Ipb_expiry_invalid", "Ipbexpiry", "Ipblocklist", "Ipbreason", "Ipbsubmit", "Ipusubmit", "Ipusuccess", "Isbn", "Isredirect", "Italic_sample", "Italic_tip", "Iteminvalidname", "Largefile", "Last", "Lastmodified", "Lastmodifiedby", "Lineno", "Link_sample", "Link_tip", "Linklistsub", "Linkshere", "Linkstoimage", "Linktrail", "Listadmins", "Listform", "Listusers", "Loadhist", "Loadingrev", "Localtime", "Lockbtn", "Lockconfirm", "Lockdb", "Lockdbsuccesssub", "Lockdbsuccesstext", "Lockdbtext", "Locknoconfirm", "Login", "Loginend", "Loginerror", "Loginpagetitle", "Loginproblem", "Loginprompt", "Loginreqtext", "Loginreqtitle", "Loginsuccess", "Loginsuccesstitle", "Logout", "Logouttext", "Logouttitle", "Lonelypages", "Longpages", "Longpagewarning", "Mailerror", "Mailmypassword", "Mailnologin", "Mailnologintext", "Mainpage", "Mainpagedocfooter", "Mainpagetext", "Maintenance", "Maintenancebacklink", "Maintnancepagetext", "Makesysop", "Makesysopfail", "Makesysopname", "Makesysopok", "Makesysopsubmit", "Makesysoptext", "Makesysoptitle", "Matchtotals", "Math", "Math_bad_output", "Math_bad_tmpdir", "Math_failure", "Math_image_error", "Math_lexing_error", "Math_notexvc", "Math_sample", "Math_syntax_error", "Math_tip", "Math_unknown_error", "Math_unknown_function", "Media_sample", "Media_tip", "Minlength", "Minoredit", "Minoreditletter", "Mispeelings", "Mispeelingspage", "Mispeelingstext", "Missingarticle", "Missingimage", "Missinglanguagelinks", "Missinglanguagelinksbutton", "Missinglanguagelinkstext", "Monobook.css", "Monobook.js", "Moredotdotdot", "Move", "Movearticle", "Movedto", "Movenologin", "Movenologintext", "Movepage", "Movepagebtn", "Movepagetalktext", "Movepagetext", "Movetalk", "Movethispage", "Mycontris", "Mypage", "Mytalk", "Navigation", "Nbytes", "Nchanges", "Newarticle", "Newarticletext", "Newmessages", "Newmessageslink", "Newpage", "Newpageletter", "Newpages", "Newpassword", "Newtitle", "Newusersonly","Newwindow", "Next", "Nextn", "Nextpage", "Nlinks", "Noaffirmation", "Noarticletext", "Noblockreason", "Noconnect", "Nocontribs", "Nocookieslogin", "Nocookiesnew", "Nocreativecommons", "Nocredits", "Nodb", "Nodublincore", "Noemail", "Noemailtext", "Noemailtitle", "Nogomatch", "Nohistory", "Nolinkshere", "Nolinkstoimage", "Noname", "Nonefound", "Nospecialpagetext", "Nosuchaction", "Nosuchactiontext", "Nosuchspecialpage", "Nosuchuser", "Notacceptable", "Notanarticle", "Notargettext", "Notargettitle", "Note", "Notextmatches", "Notitlematches", "Notloggedin", "Nowatchlist", "Nowiki_sample", "Nowiki_tip", "Nstab-category", "Nstab-help", "Nstab-image", "Nstab-main", "Nstab-media", "Nstab-mediawiki", "Nstab-special", "Nstab-template", "Nstab-user", "Nstab-wp", "Numauthors", "Numedits", "Numtalkauthors", "Numtalkedits", "Numwatchers", "Nviews", "Ok", "Oldpassword", "Orig", "Orphans", "Othercontribs", "Otherlanguages", "Others", "Pagemovedsub", "Pagemovedtext", "Pagetitle", "Passwordremindertext", "Passwordremindertitle", "Passwordsent", "Perfcached", "Perfdisabled", "Perfdisabledsub", "Personaltools", "Popularpages", "Portal", "Portal-url", "Postcomment", "Poweredby", "Powersearch", "Powersearchtext", "Preferences", "Prefs-help-userdata", "Prefs-misc", "Prefs-personal", "Prefs-rc", "Prefslogintext", "Prefsnologin", "Prefsnologintext", "Prefsreset", "Preview", "Previewconflict", "Previewnote", "Prevn", "Printableversion", "Printsubtitle", "Protect", "Protectcomment", "Protectedarticle", "Protectedpage", "Protectedpagewarning", "Protectedtext", "Protectlogpage", "Protectlogtext", "Protectpage", "Protectreason", "Protectsub", "Protectthispage", "Proxyblocker", "Proxyblockreason", "Proxyblocksuccess", "Qbbrowse", "Qbedit", "Qbfind", "Qbmyoptions", "Qbpageinfo", "Qbpageoptions", "Qbsettings", "Qbsettingsnote", "Qbspecialpages", "Querybtn", "Querysuccessful", "Randompage", "Range_block_disabled", "Rchide", "Rclinks", "Rclistfrom", "Rcliu", "Rcloaderr", "Rclsub", "Rcnote", "Rcnotefrom", "Readonly", "Readonlytext", "Readonlywarning", "Recentchanges", "Recentchangescount", "Recentchangeslinked", "Recentchangestext", "Redirectedfrom", "Remembermypassword", "Removechecked", "Removedwatch", "Removedwatchtext", "Removingchecked", "Resetprefs", "Restorelink", "Resultsperpage", "Retrievedfrom", "Returnto", "Retypenew", "Reupload", "Reuploaddesc", "Reverted", "Revertimg", "Revertpage", "Revhistory", "Revisionasof", "Revnotfound", "Revnotfoundtext", "Rfcurl", "Rights", "Rollback", "Rollback_short", "Rollbackfailed", "Rollbacklink", "Rows", "Savearticle", "Savedprefs", "Savefile", "Saveprefs", "Search", "Searchdisabled", "Searchhelppage", "Searchingwikipedia", "Searchquery", "Searchresults", "Searchresultshead", "Searchresulttext", "Sectionedit", "Selectnewerversionfordiff", "Selectolderversionfordiff", "Selectonly", "Selflinks", "Selflinkstext", "Seriousxhtmlerrors", "Servertime", "Set_rights_fail", "Set_user_rights", "Setbureaucratflag", "Sharedupload", "Shortpages", "Show", "Showhideminor", "Showingresults", "Showingresultsnum", "Showlast", "Showpreview", "Showtoc", "Sig_tip", "Sitenotice", "Sitestats", "Sitestatstext", "Sitesubtitle", "Sitesupport", "Sitetitle", "Siteuser", "Siteusers", "Skin", "Spamprotectiontext", "Spamprotectiontitle", "Specialpage", "Specialpages", "Spheading", "Sqlislogged", "Sqlquery", "Statistics", "Storedversion", "Stubthreshold", "Subcategories", "Subcategorycount", "Subject", "Subjectpage", "Successfulupload", "Summary", "Sysopspheading", "Sysoptext", "Sysoptitle", "Tableform", "Talk", "Talkexists", "Talkpage", "Talkpagemoved", "Talkpagenotmoved", "Talkpagetext", "Templatesused", "Textboxsize", "Textmatches", "Thisisdeleted", "Thumbnail-more", "Timezonelegend", "Timezoneoffset", "Timezonetext", "Titlematches", "Toc", "Toolbox", "Tooltip-compareselectedversions", "Tooltip-minoredit", "Tooltip-preview", "Tooltip-save", "Tooltip-search", "Uclinks", "Ucnote", "Uctop", "Unblockip", "Unblockiptext", "Unblocklink", "Unblocklogentry", "Uncategorizedpages", "Undelete", "Undelete_short", "Undeletearticle", "Undeletebtn", "Undeletedarticle", "Undeletedtext", "Undeletehistory", "Undeletepage", "Undeletepagetext", "Undeleterevision", "Undeleterevisions", "Unexpected", "Unlockbtn", "Unlockconfirm", "Unlockdb", "Unlockdbsuccesssub", "Unlockdbsuccesstext", "Unlockdbtext", "Unprotect", "Unprotectcomment", "Unprotectedarticle", "Unprotectsub", "Unprotectthispage", "Unusedimages", "Unusedimagestext", "Unwatch", "Unwatchthispage", "Updated", "Upload", "Uploadbtn", "Uploadcorrupt", "Uploaddisabled", "Uploadedfiles", "Uploadedimage", "Uploaderror", "Uploadfile", "Uploadlink", "Uploadlog", "Uploadlogpage", "Uploadlogpagetext", "Uploadnologin", "Uploadnologintext", "Uploadtext", "Uploadwarning", "Usenewcategorypage", "User_rights_set", "Usercssjsyoucanpreview", "Usercsspreview", "Userexists", "Userjspreview", "Userlogin", "Userlogout", "Usermailererror", "Userpage", "Userstats", "Userstatstext", "Version", "Viewcount", "Viewprevnext", "Viewsource", "Viewtalkpage", "Wantedpages", "Watch", "Watchdetails", "Watcheditlist", "Watchlist", "Watchlistcontains", "Watchlistsub", "Watchmethod-list", "Watchmethod-recent", "Watchnochange", "Watchnologin", "Watchnologintext", "Watchthis", "Watchthispage", "Welcomecreation", "Whatlinkshere", "Whitelistacctext", "Whitelistacctitle", "Whitelistedittext", "Whitelistedittitle", "Whitelistreadtext", "Whitelistreadtitle", "Wikipediapage", "Wikititlesuffix", "Wlnote", "Wlsaved","Wlshowlast","Wrong_wfQuery_params","Wrongpassword","Yourdiff","Youremail","Yourname","Yournick","Yourpassword","Yourpasswordagain","Yourrealname","Yourtext");


$a = 0;
#$sql = "SELECT cur_title,cur_text FROM cur;";
$sql = "SELECT cur_title,cur_text FROM cur where cur_user_text not like \"MediaWiki default\" order by cur_title;";
$query = mysql_query($sql) or die(mysql_error());
while ($row = mysql_fetch_object($query)) {
        if (!in_array($row->cur_title, $not_pages)) {
                $title[$a] = $row->cur_title;
                $text[$a] = $row->cur_text;
                $a++;
        }
}

$dir = "mediawiki_pages";

if(file_exists($dir)){
   rmdirr($dir);
   mkdir($dir);
}
else{
   mkdir($dir);
}


chdir("./$dir") or die;


$a = 0;


while ($a < count($title)) {
   echo "$title[$a]\n";

        $title[$a] = utf8_encode(str_replace(" ", "_", $title[$a]));
        $quoted = array();
        $in_parenthesis = false;
        for ($i = 0; $i < strlen($title[$a]); $i++) {

                $curchar = substr ($title[$a], $i, 1);
                if (ereg('[^a-zA-Z0-9_]', $curchar)) {
                        if (!$in_parenthesis) {
                                $quoted[] = '(';
                                $in_parenthesis = true;
                        }
                        $quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT);
                } else {
                        if ($in_parenthesis) {
                                $quoted[] = ')';
                                $in_parenthesis = false;
                        }
                        $quoted[] = $curchar;
                }
        }

        if ($in_parenthesis)
                $quoted[] = ')';
        $title[$a] = implode('', $quoted);
        unset($quoted);

        mkdir($title[$a]);
        chdir($title[$a]);
        $file = fopen("current", "w");
        fputs($file, "00000001");

        fclose($file);

        mkdir("revisions");

        chdir("revisions");

        $file = fopen("00000001", "w");

   #break up one string into lines
        $file_text = explode("\n", $text[$a]);

        #Haciendo que el cambio de sintaxis no ocurra
#       $file_text = change_syntax($file_text);

        $b = 0;

        while ($b < count($file_text)) {

                fputs($file, rtrim($file_text[$b]) . "\n");
                $b++;
        }

        unset($file_text);
        fclose($file);
        chdir("..");
        chdir("..");
        $a++;
}
chdir("..");
###End of Main

function change_syntax ($textString) {
        #$a = 0;

   for($a = 0; $a < count($textString); $a++){
      #print "str(before mod) = $textString[$a] \n";

      #custom plugin
      #if(preg_match("/\<fileshare\>.+\<\/fileshare\>/",$textString[$a])){
      #   $textString[$a] = fileShare($textString[$a]);
      #}

      #strpos : Returns the numeric position of the first occurrence of needle in the haystack string. Unlike the strrpos(), this function can take a full string as the needle parameter and the entire string will be used.
      #substr() returns the portion of string  specified by the start and length parameters.
      #string substr ( string string, int start [, int length] )
      if(substr($textString[$a], 0, 1) == '*'){
         $textString[$a] = bullets($textString[$a]);
      }

      if(preg_match("/^#/",$textString[$a])){
         $textString[$a] = numberedList( $textString[$a]);
      }

      #headings
      if(preg_match("/^==.+==/",$textString[$a])){
         $textString[$a] = heading( $textString[$a]);
      }

      #wikilink
      if(preg_match("/\[\[.+\]\]/",$textString[$a])){
                        $textString[$a] = wikiLinks($textString[$a]);
      }

      #media wiki new line <br\> or <BR>
      #must be after wiki links
      if (preg_match("/\<br\/{0,1}\>/i", $textString[$a])) {
         $textString[$a] = preg_replace("/\\<br\/{0,1}\>/i", "[[BR]]",$textString[$a]);
         #print "result = $textString[$a]\n";
      }
   }

        return $textString;
}



#custom plugin
#function fileShare($string) {
#   $fileshare = substr($string, strpos($string, "\\\\"));
#   $fileshare = preg_replace("/<\/fileshare>/","",$fileshare);
#   $string = "[file:" .$fileshare ."]";
#   return $string;
#}

function heading($string){
   $theHeading = $string;
   $headingLevel = 0;

   #strip the left side '=' chars
   while($headingLevel < strlen($theHeading)){
      if(substr($theHeading, 0, 1) == '='){
         $theHeading = substr($theHeading, 1);
      }
      else{
         #no more ='s in front of text
         break;
      }
      $headingLevel++;
   }

   #the left side '=' chars are now removed
   #now strip right side '=' chars
   $theHeading = substr($theHeading, 0, strpos($theHeading, '='));

   $theSyntax = "";
   #note moinmoin uses 1 less = for heading levels
   #so mediawiki "===" is the same as moinmoin "=="
   for($i = 1; $i < $headingLevel; $i++){
      $theSyntax .= "=";
   }

   $string = $theSyntax ." $theHeading " .$theSyntax;

   return $string;
}


function bullets ($string) {
        $a = 0;
        while ($a < strlen($string)) {
                $a++;
                if (substr($string, 1, 1) == "*")
                        $string = substr($string, 1);
                else
                        break;
        }
        while ($a > 0) {
                $string = " " . $string;
                $a--;
   }
        return $string;
}

function numberedList ($string) {
        if(preg_match("/^#/",$string)){
      $string = preg_replace("/^#/", " 1.", $string);
   }
   elseif(preg_match("/^##/",$string)){
      $string = preg_replace("/^##/", "  1.", $string);
   }
        return $string;
}


function wikiLinks ($string) {
   global $WIKI_LINK_START;
   global $WIKI_LINK_END;

        while (strpos($string, "[[") !== false && strpos($string, "]]") !== false) {
      #isolate link
                $link = substr($string, strpos($string, "[[") + 2);
                $link = substr($link, 0, strpos($link, "]]") + 0);

      if (strpos($link, "|") == false){
         #add new link syntax
         $link = $WIKI_LINK_START ."\"". $link ."\"" .$WIKI_LINK_END;
      }
      else{
         $dividerPosition = strpos($link, "|");

         $wikilink = substr($link, 0, $dividerPosition);
         $label = substr($link, $dividerPosition + 1, strlen($link) - $dividerPosition);

         #remove whitespace from beginning and end
         $label = trim($label);

         $link = $WIKI_LINK_START .":" .$wikilink .": "  .$label .$WIKI_LINK_END;
      }

                #omitiendo la conversión de links
                #$string = substr($string, 0, strpos($string, "[[") - 0) . $link .substr($string, strpos($string, "]]") + 2);
   }

        return $string;
}


function externalLinks($string){
   global $EXTERNAL_LINK_START;
   global $EXTERNAL_LINK_END;
   global $EXTERNAL_LINK_DIVIDER;

   #external link syntax is the same except for the label divider

   if(preg_match("/| /")){
      $string = preg_replace("/| /", " ", $string);
   }
   elseif(preg_match("/|/")){
      $string = preg_replace("/|/", " ", $string);
   }

        return $string;

}

function rmdirr($dir) {
   if($objs = glob($dir."/*")){
       foreach($objs as $obj) {
           is_dir($obj)? rmdirr($obj) : unlink($obj);
       }
   }
   rmdir($dir);
}

?>

MediaWikiConverter (last edited 2008-04-20 14:40:15 by localhost)