Удзельнік:Zedlik/Код робата/bot.commons.CommonsManager
Код клясы bot.commons.CommonsManager
Файл CommonsManager.java
/*
* Copyright (c) 2008 zedlik.
* jz53@zedlik.com
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the code author nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY ZEDLIK ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL ZEDLIK BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package bot.commons;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.plaf.basic.BasicInternalFrameTitlePane.MaximizeAction;
import bot.interwiki.InterwikiManager;
import bot.interwiki.InterwikiParser;
import net.sourceforge.jwbf.bots.MediaWikiBot;
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle;
public class CommonsManager
{
public static final int COMMONS_TEMPLATE_DEFAULT_VIEW = 0;
public static final int COMMONS_TEMPLATE_VIEW_MINI = 1;
public static final int COMMONS_TEMPLATE_VIEW_FULL = 2;
private MediaWikiBot b;
private BufferedWriter out;
private int articlesToChangeCount;
private int articlesChanged;
private String lastCommonsLinkUsed;
private InterwikiManager interwikiManager;
public CommonsManager(MediaWikiBot b)
{
this.b = b;
articlesToChangeCount = Integer.MAX_VALUE;
articlesChanged = 0;
lastCommonsLinkUsed = "";
interwikiManager = new InterwikiManager();
}
public void setMaxArticlesToChangeCount(int count)
{
articlesToChangeCount = count;
}
public void addArticleCommons() throws Exception
{
addCommons("", false);
}
public void addCategoryCommons() throws Exception
{
addCommons("", true);
}
public void addArticleCommons(String startArticle) throws Exception
{
addCommons(startArticle, false);
}
public void addCategoryCommons(String startArticle) throws Exception
{
addCommons(startArticle, true);
}
public void addCommons(String startArticle, boolean isCategory) throws Exception
{
//Iterable<String> categories = b.getAllPageTitles("", "", true, true);
String articleToStart = null;
if (startArticle.length() > 0)
{
articleToStart = startArticle;
}
// int[] allNamespaces = {0, 1, 4, 5, 10, 11, 12, 13, 14, 15};
int[] articleslNamespace = new int[1];
int[] namespace = new int[1];
if (isCategory)
{
articleslNamespace[0] = 14; // for categories
}
else
{
articleslNamespace[0] = 0; // for articles
}
for(int i = 0; i < articleslNamespace.length; i++)
{
namespace[0] = articleslNamespace[i];
Iterator<String> articles = b.getAllPageTitles(articleToStart, null, true, true, namespace).iterator();
out = new BufferedWriter(new FileWriter("p:/commons.txt"));
while (articles.hasNext() && (articlesChanged < articlesToChangeCount))
{
String articleName = articles.next();
articleName = replaceHTMLCodesInTitle(articleName);
addArticleCommonsIfNeeded(articleName, isCategory);
Thread.sleep(400);
out.flush();
}
out.close();
}
}
protected String replaceHTMLCodesInTitle(String articleName)
{
String s = articleName;
boolean replaceMore = true;
while (replaceMore)
{
replaceMore = false;
Pattern p = Pattern.compile("\\&\\#([0-9]{1,5})\\;");
Matcher m = p.matcher(s);
if (m.find())
{
// String hexIntValue = Integer.toHexString(Integer.parseInt(m.group(1)));
// Character c = new Character('a');
char[] c = { (char)Integer.parseInt(m.group(1)) };
s = m.replaceFirst(new String(c));
replaceMore = true;
}
}
return s;
}
protected void addArticleCommonsIfNeeded(String articleName, boolean isCategory) throws Exception
{
// assume as article
SimpleArticle sa = new SimpleArticle(b.readContent(articleName));
String contents = sa.getText();
if (!commonsExists(contents))
{
System.out.println(" processing " + articleName);
String commonsCategory = getCommonsFromInterwiki(contents);
String commonsCategoryEx = "";
if (!isCategory)
{
commonsCategoryEx = getCommonsCategoryFromCommons(commonsCategory);
}
String commonsCategoryToUse = "";
if (commonsCategoryEx.length() > 0 && !commonsCategoryEx.equals(commonsCategory))
{
if (commonsArticleExists(commonsCategoryEx))
{
commonsCategoryToUse = commonsCategoryEx;
}
else
{
if (commonsArticleExists(commonsCategory))
{
commonsCategoryToUse = commonsCategory;
}
// else
// both pages empty
}
}
else
{
if (commonsCategory.length() > 0)
{
if (commonsArticleExists(commonsCategory))
{
commonsCategoryToUse = commonsCategory;
}
}
}
if (articlesChanged < articlesToChangeCount)
{
if (commonsCategoryToUse.length() > 0)
{
String contentsCommons;
if (isCategory)
{
contentsCommons = addCommonsToCategory(commonsCategoryToUse, contents);
}
else
{
contentsCommons = addCommonsToArticle(commonsCategoryToUse, contents);
}
if (contents.equals(contentsCommons))
{
System.out.println("[!!!] No ext links: " + articleName + ". Category: " + commonsCategoryToUse);
String outLine = "[!!!] No ext links: " + articleName + ". Category: " + commonsCategoryToUse + "\r\n";
out.write(outLine);
}
else
{
String outLine = "[+]" + articleName + ": + commons (" + commonsCategoryToUse + ")" + "\r\n";
out.write(outLine);
System.out.println("[+]" + articleName + ": + commons (" + commonsCategoryToUse + ")");
if (commonsCategoryEx.length() == 0 && !isCategory)
{
System.out.println(" ---> check commons category for: " + articleName);
outLine = " ---> check commons category for: " + articleName + "\r\n";
out.write(outLine);
}
lastCommonsLinkUsed = commonsCategoryToUse;
writeArticle(contentsCommons, articleName);
articlesChanged++;
}
}
}
}
else
{
// System.out.println(articleName + ": commons exists");
}
}
protected boolean commonsArticleExists(String commonsArticleTitle) throws Exception
{
MediaWikiBot bm = new MediaWikiBot("http://commons.wikimedia.org/w/");
SimpleArticle sam = new SimpleArticle(bm.readContent(commonsArticleTitle));
return sam.getText().length() > 0;
}
public String addCommonsToArticle(String commonsTitle, String contents)
{
Pattern p = Pattern.compile("(\\=\\=(?:[ ]{0,4})Вонкавыя спасылкі(?:[ ]{0,4})\\=\\=(?:[\\r\\n]{0,6}))((.*){1,10})", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(contents);
if (m.find())
{
contents = m.replaceFirst("$1" + getCommonsTemplate(commonsTitle, COMMONS_TEMPLATE_VIEW_MINI) + "\r\n$2");
}
else
{
p = Pattern.compile("\\[\\[(?:Катэгорыя|Category)\\:(.*){1,100}\\]\\]", Pattern.CASE_INSENSITIVE);
m = p.matcher(contents);
if (m.find())
{
contents = m.replaceFirst("== Вонкавыя спасылкі ==\r\n" + getCommonsTemplate(commonsTitle, COMMONS_TEMPLATE_VIEW_MINI) + "\r\n\r\n$0");
}
}
return contents;
}
public String addCommonsToCategory(String commonsTitle, String contents)
{
contents = getCommonsTemplate(commonsTitle, COMMONS_TEMPLATE_VIEW_FULL) + "\r\n" + contents;
return contents;
}
protected String getCommonsTemplate(String commonsTitle, int templateView)
{
String templateParameters = "";
switch (templateView)
{
case COMMONS_TEMPLATE_DEFAULT_VIEW:
break;
case COMMONS_TEMPLATE_VIEW_MINI:
templateParameters = templateParameters + "|выгляд=міні";
break;
case COMMONS_TEMPLATE_VIEW_FULL:
templateParameters = templateParameters + "|выгляд=поўны";
break;
}
return "{{Commons|" + commonsTitle + templateParameters + "}}";
}
protected boolean commonsExists(String contents)
{
contents = contents.toLowerCase(new Locale("be"));
return (contents.indexOf("{{commons|") >= 0) || (contents.indexOf("{{commons}}") >= 0);
}
protected String getCommonsFromInterwiki(String contents) throws Exception
{
String interwikiArticle;
String interwikiCommonsTitle;
String interwikiArticleTitle;
interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "en");
interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "en");
interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
if (interwikiCommonsTitle.length() > 0)
{
return interwikiCommonsTitle;
}
interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "de");
interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "de");
interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
if (interwikiCommonsTitle.length() > 0)
{
return interwikiCommonsTitle;
}
interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "pl");
interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "pl");
interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
if (interwikiCommonsTitle.length() > 0)
{
return interwikiCommonsTitle;
}
interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "ru");
interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "ru");
interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
if (interwikiCommonsTitle.length() > 0)
{
return interwikiCommonsTitle;
}
return "";
}
protected String getCommonsCategoryFromCommons(String commonsTitle) throws Exception
{
if (commonsTitle.startsWith("Category:"))
{
return commonsTitle;
}
if (commonsTitle.length() > 0)
{
MediaWikiBot bIW = new MediaWikiBot("http://commons.wikimedia.org/w/");
SimpleArticle saIW = new SimpleArticle(bIW.readContent(commonsTitle));
String commonsArticleContents = saIW.getText();
if (commonsArticleContents.contains("{{Category redirect") ||
commonsArticleContents.contains("{{category redirect") ||
commonsArticleContents.contains("{{Seecat") ||
commonsArticleContents.contains("{{seecat") ||
commonsArticleContents.contains("{{See cat") ||
commonsArticleContents.contains("{{see cat") ||
commonsArticleContents.contains("{{Categoryredirect") ||
commonsArticleContents.contains("{{categoryredirect") ||
commonsArticleContents.contains("{{Catredirect") ||
commonsArticleContents.contains("{{catredirect") ||
commonsArticleContents.contains("{{Cat redirect") ||
commonsArticleContents.contains("{{cat redirect") ||
commonsArticleContents.contains("{{CatRed") ||
commonsArticleContents.contains("{{catRed") ||
commonsArticleContents.contains("#REDIRECT") ||
commonsArticleContents.contains("#Redirect") ||
commonsArticleContents.contains("#redirect"))
{
System.out.println("[!] Check: category redirect found");
String outLine = "[!] Check: category redirect found. Commons title: " + commonsTitle + "\r\n";
out.write(outLine);
}
// getting list of categories
List categoriesList = getArticleCategoriesList(commonsArticleContents);
if (categoriesList.size() == 1)
{
return "Category:" + (String)categoriesList.get(0);
}
else if (categoriesList.size() > 1)
{
for (Iterator it = categoriesList.iterator(); it.hasNext(); )
{
String x = (String)it.next();
if (x.equalsIgnoreCase(commonsTitle))
{
return "Category:" + x;
}
}
}
}
return "";
}
private List getArticleCategoriesList(String contents)
{
List categoryList = new ArrayList();
Pattern p = Pattern.compile("\\[\\[Category\\:([^|\\]]{0,100})*?(\\|(.*))?\\]\\]", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(contents);
int i = 0;
while (m.find(i))
{
String commonsTitle = m.group(1);
boolean itemExists = categoryList.contains(commonsTitle);
if (!itemExists)
{
categoryList.add(commonsTitle);
}
i += commonsTitle.length();
}
return categoryList;
}
private void writeArticle(String articleContents, String articeTitle) throws Exception
{
SimpleArticle sa = new SimpleArticle();
sa.setText(articleContents);
// sa.setLabel("Удзельнік:Zedlik/Пясочніца/Магадзішу");
sa.setLabel(articeTitle);
String linkTo = "[[:commons:" + lastCommonsLinkUsed + "]]";
String linkUrl = "[[:" + interwikiManager.getLastInterwikiUsed() + ":" + interwikiManager.getLastInterwikiLinkUsed() + "]]";
String summary = "Робат дадаў шаблён Commons (спасылка на " + linkTo + "; крыніца: " + linkUrl + ")";
sa.setEditSummary(summary);
sa.setMinorEdit(true);
System.out.println(summary);
b.writeContent(sa);
Thread.sleep(2000);
}
}