Удзельнік:Zedlik/Код робата/bot.commons.CommonsManager

Код клясы bot.commons.CommonsManager
Файл CommonsManager.java

/*
 * Copyright (c) 2008 zedlik.
 * jz53@zedlik.com
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of the code author nor the 
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ZEDLIK ''AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL ZEDLIK BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package bot.commons;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.swing.plaf.basic.BasicInternalFrameTitlePane.MaximizeAction;

import bot.interwiki.InterwikiManager;
import bot.interwiki.InterwikiParser;

import net.sourceforge.jwbf.bots.MediaWikiBot;
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle;

public class CommonsManager 
{
	public static final int COMMONS_TEMPLATE_DEFAULT_VIEW = 0;
	public static final int COMMONS_TEMPLATE_VIEW_MINI = 1;
	public static final int COMMONS_TEMPLATE_VIEW_FULL = 2;
	
	private MediaWikiBot b;
	private BufferedWriter out;
	private int articlesToChangeCount;
	private int articlesChanged;
	private String lastCommonsLinkUsed;
	
	private InterwikiManager interwikiManager;
	
	
	public CommonsManager(MediaWikiBot b)
	{
		this.b = b;
		articlesToChangeCount = Integer.MAX_VALUE;
		articlesChanged = 0;
		lastCommonsLinkUsed = "";
		
		interwikiManager = new InterwikiManager();
	}
	
	public void setMaxArticlesToChangeCount(int count)
	{
		articlesToChangeCount = count;
	}
	
	
	public void addArticleCommons() throws Exception
	{
		addCommons("", false);
	}
	
	public void addCategoryCommons() throws Exception
	{
		addCommons("", true);
	}
	
	public void addArticleCommons(String startArticle) throws Exception
	{
		addCommons(startArticle, false);
	}
	
	public void addCategoryCommons(String startArticle) throws Exception
	{
		addCommons(startArticle, true);
	}
	
	public void addCommons(String startArticle, boolean isCategory) throws Exception
	{
		//Iterable<String> categories = b.getAllPageTitles("", "", true, true);
		
		String articleToStart = null;
		if (startArticle.length() > 0)
		{
			articleToStart = startArticle; 
		}
		
		// int[] allNamespaces = {0, 1, 4, 5, 10, 11, 12, 13, 14, 15};
		int[] articleslNamespace = new int[1];
		int[] namespace = new int[1];
		
		if (isCategory)
		{
			articleslNamespace[0] = 14; // for categories
		}
		else
		{
			articleslNamespace[0] = 0; // for articles
		}
		
		for(int i = 0; i < articleslNamespace.length; i++)
		{		
			namespace[0] = articleslNamespace[i];
		
			Iterator<String> articles = b.getAllPageTitles(articleToStart, null, true, true, namespace).iterator();

			out = new BufferedWriter(new FileWriter("p:/commons.txt"));		
			
			while (articles.hasNext() && (articlesChanged < articlesToChangeCount))
			{
				String articleName = articles.next();
				articleName = replaceHTMLCodesInTitle(articleName);

				addArticleCommonsIfNeeded(articleName, isCategory);
				Thread.sleep(400);
				out.flush();
			}
			
			out.close();
		}
	}
	
	protected String replaceHTMLCodesInTitle(String articleName)
	{
		String s = articleName;
		boolean replaceMore = true;
		while (replaceMore)
		{
			replaceMore = false;
			Pattern p = Pattern.compile("\\&\\#([0-9]{1,5})\\;");
			Matcher m = p.matcher(s);
			if (m.find())
			{
				// String hexIntValue = Integer.toHexString(Integer.parseInt(m.group(1)));
				// Character c = new Character('a');
				char[] c = { (char)Integer.parseInt(m.group(1)) }; 
				s = m.replaceFirst(new String(c));
				replaceMore = true;
			}
		}
		
		return s;
	}
	
	protected void addArticleCommonsIfNeeded(String articleName, boolean isCategory) throws Exception
	{
		// assume as article
		
		SimpleArticle sa = new SimpleArticle(b.readContent(articleName));
		String contents = sa.getText();
		
		if (!commonsExists(contents))
		{
			System.out.println("   processing " + articleName);
			String commonsCategory = getCommonsFromInterwiki(contents);
			String commonsCategoryEx = "";
			if (!isCategory)
			{
				commonsCategoryEx = getCommonsCategoryFromCommons(commonsCategory);
			}
			
			String commonsCategoryToUse = "";
			
			if (commonsCategoryEx.length() > 0 && !commonsCategoryEx.equals(commonsCategory))
			{
				if (commonsArticleExists(commonsCategoryEx))
				{
					commonsCategoryToUse = commonsCategoryEx; 
				}
				else
				{
					if (commonsArticleExists(commonsCategory))
					{
						commonsCategoryToUse = commonsCategory; 
					}
					// else
					// both pages empty
				}
			}
			else
			{
				if (commonsCategory.length() > 0)
				{
					if (commonsArticleExists(commonsCategory))
					{
						commonsCategoryToUse = commonsCategory;
					}
				}
			}
			
			if (articlesChanged < articlesToChangeCount)
			{
				if (commonsCategoryToUse.length() > 0)
				{
					String contentsCommons;
					
					if (isCategory)
					{
						contentsCommons = addCommonsToCategory(commonsCategoryToUse, contents);
					}
					else
					{
						contentsCommons = addCommonsToArticle(commonsCategoryToUse, contents);
					}
					
					if (contents.equals(contentsCommons)) 
					{
						System.out.println("[!!!] No ext links: " + articleName + ". Category: " + commonsCategoryToUse);
						String outLine = "[!!!] No ext links: " + articleName + ". Category: " + commonsCategoryToUse + "\r\n";
						out.write(outLine);
					}
					else
					{
						String outLine = "[+]" + articleName + ": + commons (" + commonsCategoryToUse + ")" + "\r\n";
						out.write(outLine);
						System.out.println("[+]" + articleName + ": + commons (" + commonsCategoryToUse + ")");
						if (commonsCategoryEx.length() == 0 && !isCategory)
						{
							System.out.println("   ---> check commons category for: " + articleName);
							outLine = "   ---> check commons category for: " + articleName + "\r\n";
							out.write(outLine);
						}
						lastCommonsLinkUsed = commonsCategoryToUse; 
						writeArticle(contentsCommons, articleName);
						articlesChanged++;
					}
				}
				
			}
		}
		else
		{
			// System.out.println(articleName + ": commons exists");
		}
	}
	
	protected boolean commonsArticleExists(String commonsArticleTitle) throws Exception
	{
		MediaWikiBot bm = new MediaWikiBot("http://commons.wikimedia.org/w/");
		SimpleArticle sam = new SimpleArticle(bm.readContent(commonsArticleTitle));
		return sam.getText().length() > 0;
	}
	
	public String addCommonsToArticle(String commonsTitle, String contents)
	{
		Pattern p = Pattern.compile("(\\=\\=(?:[ ]{0,4})Вонкавыя спасылкі(?:[ ]{0,4})\\=\\=(?:[\\r\\n]{0,6}))((.*){1,10})", Pattern.CASE_INSENSITIVE);
		Matcher m = p.matcher(contents);
		
		if (m.find())
		{
			contents = m.replaceFirst("$1" + getCommonsTemplate(commonsTitle, COMMONS_TEMPLATE_VIEW_MINI) + "\r\n$2");
		}
		else
		{
			p = Pattern.compile("\\[\\[(?:Катэгорыя|Category)\\:(.*){1,100}\\]\\]", Pattern.CASE_INSENSITIVE);
			m = p.matcher(contents);
			if (m.find())
			{
				contents = m.replaceFirst("== Вонкавыя спасылкі ==\r\n" + getCommonsTemplate(commonsTitle, COMMONS_TEMPLATE_VIEW_MINI) + "\r\n\r\n$0");
			}
		}
		
		return contents;
	}
	
	public String addCommonsToCategory(String commonsTitle, String contents)
	{
		contents = getCommonsTemplate(commonsTitle, COMMONS_TEMPLATE_VIEW_FULL) + "\r\n" + contents;
		
		return contents;
	}
	
	protected String getCommonsTemplate(String commonsTitle, int templateView)
	{
		String templateParameters = "";
		
		switch (templateView)
		{
			case COMMONS_TEMPLATE_DEFAULT_VIEW:
				break;
			case COMMONS_TEMPLATE_VIEW_MINI:
				templateParameters = templateParameters + "|выгляд=міні";
				break;
			case COMMONS_TEMPLATE_VIEW_FULL:
				templateParameters = templateParameters + "|выгляд=поўны";
				break;
		}
		
		return "{{Commons|" + commonsTitle + templateParameters + "}}";
	}
	
	protected boolean commonsExists(String contents)
	{
		contents = contents.toLowerCase(new Locale("be"));
		return (contents.indexOf("{{commons|") >= 0) || (contents.indexOf("{{commons}}") >= 0);
	}
	
	protected String getCommonsFromInterwiki(String contents) throws Exception
	{
		String interwikiArticle;
		String interwikiCommonsTitle;
		String interwikiArticleTitle;

		interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "en");
		interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "en");
		interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
		
		if (interwikiCommonsTitle.length() > 0)
		{
			return interwikiCommonsTitle; 
		}

		interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "de");
		interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "de");
		interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
		
		if (interwikiCommonsTitle.length() > 0)
		{
			return interwikiCommonsTitle; 
		}

		interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "pl");
		interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "pl");
		interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
		
		if (interwikiCommonsTitle.length() > 0)
		{
			return interwikiCommonsTitle; 
		}

		interwikiArticle = interwikiManager.getInterwikiArticleContents(contents, "ru");
		interwikiArticleTitle = interwikiManager.getInterwikiArticleTitle(contents, "ru");
		interwikiCommonsTitle = interwikiManager.getInterwikiCommonsTitle(interwikiArticle, interwikiArticleTitle);
		
		if (interwikiCommonsTitle.length() > 0)
		{
			return interwikiCommonsTitle; 
		}
		
		return "";
	}
	
	
	protected String getCommonsCategoryFromCommons(String commonsTitle) throws Exception
	{
		if (commonsTitle.startsWith("Category:"))
		{
			return commonsTitle;
		}
		
		if (commonsTitle.length() > 0)
		{
			MediaWikiBot bIW = new MediaWikiBot("http://commons.wikimedia.org/w/");
			SimpleArticle saIW = new SimpleArticle(bIW.readContent(commonsTitle));
	
			String commonsArticleContents = saIW.getText();
			if (commonsArticleContents.contains("{{Category redirect") ||
					commonsArticleContents.contains("{{category redirect") ||
					commonsArticleContents.contains("{{Seecat") ||
					commonsArticleContents.contains("{{seecat") ||
					commonsArticleContents.contains("{{See cat") ||
					commonsArticleContents.contains("{{see cat") ||
					commonsArticleContents.contains("{{Categoryredirect") ||
					commonsArticleContents.contains("{{categoryredirect") ||
					commonsArticleContents.contains("{{Catredirect") ||
					commonsArticleContents.contains("{{catredirect") ||
					commonsArticleContents.contains("{{Cat redirect") ||
					commonsArticleContents.contains("{{cat redirect") ||
					commonsArticleContents.contains("{{CatRed") ||
					commonsArticleContents.contains("{{catRed") ||
					commonsArticleContents.contains("#REDIRECT") ||
					commonsArticleContents.contains("#Redirect") ||
					commonsArticleContents.contains("#redirect"))
								
			{
				System.out.println("[!] Check: category redirect found");
				String outLine = "[!] Check: category redirect found. Commons title: " + commonsTitle + "\r\n";
				out.write(outLine);
			}
			
			// getting list of categories
			List categoriesList = getArticleCategoriesList(commonsArticleContents);
			
			if (categoriesList.size() == 1)
			{
				return "Category:" + (String)categoriesList.get(0);
			}
			else if (categoriesList.size() > 1)
			{
				for (Iterator it = categoriesList.iterator(); it.hasNext(); )
				{
					String x = (String)it.next();
					if (x.equalsIgnoreCase(commonsTitle))
					{
						return "Category:" + x;
					}
				}
			}
			
		}
		
		return "";
	}

	private List getArticleCategoriesList(String contents)
	{
		List categoryList = new ArrayList(); 

		Pattern p = Pattern.compile("\\[\\[Category\\:([^|\\]]{0,100})*?(\\|(.*))?\\]\\]", Pattern.CASE_INSENSITIVE);
		Matcher m = p.matcher(contents);
		
		int i = 0;
		while (m.find(i))
		{
			String commonsTitle = m.group(1);
			
			boolean itemExists = categoryList.contains(commonsTitle);
			if (!itemExists)
			{
				categoryList.add(commonsTitle);
			}
			
			i += commonsTitle.length();
		}

		return categoryList;
	}
	
	
	private void writeArticle(String articleContents, String articeTitle) throws Exception
	{
		SimpleArticle sa = new SimpleArticle();
		sa.setText(articleContents);
		// sa.setLabel("Удзельнік:Zedlik/Пясочніца/Магадзішу");
		sa.setLabel(articeTitle);
		
		String linkTo = "[[:commons:" + lastCommonsLinkUsed + "]]";
		String linkUrl = "[[:" + interwikiManager.getLastInterwikiUsed() + ":" + interwikiManager.getLastInterwikiLinkUsed() + "]]";
		
		String summary = "Робат дадаў шаблён Commons (спасылка на " + linkTo + "; крыніца: " + linkUrl + ")";
		
		sa.setEditSummary(summary);
		sa.setMinorEdit(true);
		
		System.out.println(summary);
		b.writeContent(sa);
		
		Thread.sleep(2000);
	}	
	

}