/*
 * $Id: RSSWebSpiderRobot.java,v 1.12 2006/02/04 16:18:29 rampil Exp $
 * Copyright (c) 2005 LOGICAL-PARADOX.ORG
 */
package org.logical_paradox.rss.robot;
import java.rmi.RemoteException;
import java.util.Calendar;
import java.util.Date;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.logical_paradox.common.cli.Argument;
import org.logical_paradox.common.cli.ArgumentParser;
import org.logical_paradox.rss.ModuleException;
import org.logical_paradox.rss.RSSConstant;
import org.logical_paradox.rss.RSSProperties;
import org.logical_paradox.rss.RSSSubModule;
import org.logical_paradox.rss.http.RSSHttpURLConnectionFactory;
import org.logical_paradox.rss.http.contents.ContentsAnalyzer;
import org.logical_paradox.rss.robot.event.RSSWSAgentEvent;
import org.logical_paradox.rss.robot.event.RSSWSAgentListener;

/**
 * RSSWebSpiderRobot
 * RSS Web񃍃{bg{
 * @author satoshi akabane@logical-paradox.org
 * @version $Revision: 1.12 $
 */
public class RSSWebSpiderRobot implements RSSWSAgentListener, RSSSubModule {
	/** K[ */
	private static final Log log = LogFactory.getLog(RSSWebSpiderRobot.class);

	public static final String RSS_SPIDER_CONF_PATH = "../conf/rsswebspider.conf";

	private String RSSWSRobotKey = null;				// {bg̖O(Iɐ)
	protected RSSWebSpiderConfig cfg = null;			// {bgуG[WFg̐ݒf[^
	protected RSSWebNavigator navigator = null;		// oHirQ[^
	protected RSSWebSpiderAgent agents[] = null;		// TsȂG[WFgXbh

	protected int timeoutSec = 360000;				// ڑ^CAEgb(ftHg = 1)

	/** RecAiCU */
	protected ContentsAnalyzer contentsAnalyzer;
	/** {bg̃RtBOt@C */
	private String robotConfig;
	/** oHirQ[^̃RtBOt@C */
	private String navigatorConfig;
	/** [JRecǗVXeRtBOt@C */
	private String localManagerConfig;

	/**
	 * RXgN^D
	 */
	public RSSWebSpiderRobot() {
	}

	/**
	 * {bgC֐D
	 * @param args vO
	 * @throws Exception ȂO
	 */
	public static void main(String[] args) throws Exception {
		RSSWebSpiderRobot robot = new RSSWebSpiderRobot();

		Argument[] arguments = ArgumentParser.parse(args);
		for(int i = 0; i < arguments.length; i++) {
			Argument a = arguments[i];
			if(a.isOption()) {
				if("robot-config".equals(a.getName())) {
					robot.robotConfig = a.getValue();
				} else if("navigator-config".equals(a.getName())) {
					robot.navigatorConfig = a.getValue();
				} else if("lcm-config".equals(a.getName())) {
					robot.localManagerConfig = a.getValue();
				}
			}
		}
		try {
			robot.startModule();
		} catch(Exception e) {
			log.error("{bg̏Ɏs܂DOmFĂ", e);
			System.exit(1);
		}
	}

	/**
	 * G[WFgXbh̃[vJnۂɌĂ΂R[obN
	 */
	public boolean AtTheBeginningOfAgentCallback(RSSWebSpiderAgent me) {
		// oHirQ[^ŏ҂
		navigator.waitUntilEmpty();
		synchronized(navigator) {
			// oHirQ[^̏Ԃωꍇ́C珈𑱍s
			// ̃Xbhėp
			try {
				String url = null;
				while((url = navigator.getNextURL()) == null) {
					navigator.wait();
				}
				log.trace(Thread.currentThread().getName() + " " + url);
				me.setup(url, timeoutSec);
			} catch(Exception e) {
				return false;
			}
		}

		return true;
	}

	/**
	 * G[WFg̃XbhIꍇ̃R[obN\bh
	 * ăX^[gJnꂽꍇtrue,߂ꍇfalseԂ
	 */
	public boolean AtTheEndOfAgentCallback(RSSWebSpiderAgent me) {
		return true;
	}

	/**
	 * {bg̏
	 */
	private void init() throws Exception {
		// ݒt@C̓ǂݍ݂Ɠě
		cfg = new RSSWebSpiderConfig(robotConfig != null ? robotConfig : RSS_SPIDER_CONF_PATH);

		log.info("OC^[܂");

		// httpڑIuWFNgt@Ng̏
		String httpStubModeString = RSSProperties.getString(RSSConstant.RSS_PKEY_STUB_HTTP_CONNECTION);
		if(httpStubModeString != null) {
			boolean testmode = Boolean.valueOf(httpStubModeString).booleanValue();
			RSSHttpURLConnectionFactory.enableTestMode(testmode);

			log.info("eXgڑ[h : " + testmode);

			if(testmode == true) {
				String hostname = RSSProperties.getString(RSSConstant.RSS_PKEY_STUB_HTTP_HOSTNAME);
				int port = Integer.parseInt(RSSProperties.getString(RSSConstant.RSS_PKEY_STUB_HTTP_PORT));

				RSSHttpURLConnectionFactory.setLocalhostSetting(hostname, port);
				log.info("eXg̐ڑ : http://" + hostname + ":" + port);
			}
		}

		// Rec͊𐶐
		String contentsAnalyzerClazz = RSSProperties.getString(RSSConstant.RSS_PKEY_CONTENTS_ANALYZER);
		contentsAnalyzer = (ContentsAnalyzer)Class.forName(contentsAnalyzerClazz).newInstance();
		log.info("Rec͊𐶐܂F" + contentsAnalyzerClazz);

		// oHirQ[^쐬
		navigator = RSSWebNavigator.getNavigator(navigatorConfig);
		navigator.enableLocalContentsMgr(localManagerConfig);
		log.info("oHirQ[^쐬܂");


		RSSWSRobotKey = navigator.getRobotId();
		log.info("Robot name : " + getRobotKey());

		// w肳ꂽʂ̃G[WFgOɍ쐬[u
		int countOfAgents = cfg.getAgentsPerRobot();
		agents = new RSSWebSpiderAgent[ countOfAgents ];
		for(int i = 0; i < countOfAgents; i++) {
			agents[i] = new RSSWebSpiderAgent(this);
			agents[i].addRSSWSAgentListener(this);
		}
		log.info(agents.length + "̒TG[WFg쐬܂");

		timeoutSec = cfg.getAgentConnectionTimeoutSec();

		String[] contentTypes = cfg.getAllowedContentType();
		StringBuffer logLine = new StringBuffer();
		logLine.append("WΏۃRec^Cv: ");

		if(contentTypes == null || contentTypes.length == 0) {
			logLine.append("S");
		} else {
			for(int i = 0; i < contentTypes.length; i++) {
				logLine.append(contentTypes[i] + " ");
			}
		}
		log.info(logLine.toString());
	}

	/**
	 * ̃{bgconfigurationԂD
	 * @return ̃{bg̃RtBO[V
	 */
	public RSSWebSpiderConfig getSpiderConfig() {
		return cfg;
	}
	/**
	 * Rec͊ԂD
	 * @return Rec͊
	 */
	public ContentsAnalyzer getContentsAnalyzer() {
		return contentsAnalyzer;
	}
	/**
	 * {bg̃j[NIDԂD
	 * @return {bg̃j[NID
	 */
	public String getRobotKey() {
		return RSSWSRobotKey;
	}

	public synchronized void onConnect(RSSWSAgentEvent e) {
		log.trace("ڑ܂");
	}

	public void onClose(RSSWSAgentEvent e) {
		log.trace("Rec̓ǂݍ݂܂");
		try {
			// WebRecirQ[^֓]
			// ɏʂƂ́CRec̑M\ȏԂłƂӖ
			// Crobots.txt̏ꍇ̓RecȂꍇ
			log.trace("Rec̓eo^Ă܂");
			navigator.sendContents(e.getContents());

			// \[X̉̂ŁCoHirQ[^ɃbZ[W𑗐M
			// ̃irQ[^̏ԕω҂őҋ@ĂSẴG[WFgN
			log.trace("[JTL[:" + navigator.getQueueSize());

			synchronized(navigator) {
				if(navigator.getQueueSize() > 0) {
					navigator.notifyAll();				// getNextURL() == nullŒ~ĂXbhN
				}
			}

		} catch(Exception ie) {
			ie.printStackTrace();
		}
	}

	/**
	 * {bg̏I
	 * t@CiCUĂ΂
	 */
	public void close() throws RemoteException {
		navigator.close();
		navigator = null;
		RSSWSRobotKey = null;

		// SẴG[WFg~
		if(agents != null) {
			for(int i = 0; i < agents.length; i++) {
				agents[i] = null;
			}
		}
	}

	public void finalize() throws RemoteException {
		close();
	}


	/**
	 * W[Rg[pC^[tF[X::
	 * W[Jn
	 */
	public void startModule() throws ModuleException {
		try {
			init();
			// XbhSX^[g
			for(int i = 0; i < agents.length; i++) {
				agents[i].start();
			}

			Date bgn = Calendar.getInstance().getTime();
			log.info("Jn: " + bgn.toString());

			// oHirQ[^̏Ԃ܂ɒׂ鏈
			// IĂlqł΁CG[WFg~
			boolean exitf = true;
			int suscnt = 0;

			while(exitf) {
				Thread.sleep(500);
				if(suscnt == 30 || navigator.isFinished() == true) {
					// Sꍇ
					for(RSSWebSpiderAgent thread : agents) {
						// S̃XbhI
						thread.shutdown();
					}
					exitf = false;
				} else if(navigator.getQueueSize() == 0 && navigator.countSuspendedSites() > 0) {
					// [J̒TpURLL[ɂ͉ς܂ĂȂǁCT}̃TCgꍇ
					// Ƃ肠T}JE^AbvĂ
					// 莞Ԃo߂Ă󂪕ςȂꍇ́C߂ďI邽
					suscnt++;
					log.info("T}̉񕜂҂Ă܂:" + suscnt);
				} else {
					// T}ȊǑŊĂȂꍇ
					suscnt = 0;
				}

			}
			// Xbh̑҂킹
			// SII
			for(Thread thread : agents) {
				thread.join();
			}

			// I
			close();
			log.info("done.");

			Date fin = Calendar.getInstance().getTime();
			log.info("I: " + fin.toString());
			log.info("T܂");
		} catch(Exception e) {
			e.printStackTrace();
			throw new ModuleException(e);
		}
	}

	/**
	 * W[Rg[pC^[tF[X::
	 * W[ꎞ~
	 */
	public void suspendModule() throws ModuleException {
	}

	/**
	 * W[Rg[pC^[tF[X::
	 * ~Ă郂W[̎sĊJ
	 */
	public void continueModule() throws ModuleException {
	}

	/**
	 * W[Rg[pC^[tF[X::
	 * W[~
 	 * ̃\bhĂ΂ꂽꍇCW[͈Sȕ@ŏI
	 * IW[́CVCX^XȂƊJn邱Ƃ͂łȂ
	 */
	public void stopModule() throws ModuleException {
	}
}
