/*
 * $Id: RSSWebSpiderAgent.java,v 1.11 2006/02/04 16:18:29 rampil Exp $
 * Copyright (c) 2005 LOGICAL-PARADOX.ORG
 */
package org.logical_paradox.rss.robot;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.TooManyListenersException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.logical_paradox.rss.IllegalObjectStateException;
import org.logical_paradox.rss.http.RSSHttpResponse;
import org.logical_paradox.rss.http.RSSHttpURLConnection;
import org.logical_paradox.rss.http.RSSHttpURLConnectionFactory;
import org.logical_paradox.rss.http.WebContents;
import org.logical_paradox.rss.robot.event.RSSWSAgentEvent;
import org.logical_paradox.rss.robot.event.RSSWSAgentListener;

import HTTPClient.NVPair;

/**
 * RSSWebSpiderAgent
 * WebG[WFg
 * @author satoshi akabane@logical-paradox.org
 * @version $Revision: 1.11 $
 */
public class RSSWebSpiderAgent extends Thread {
	/** K[ */
	private static final Log log = LogFactory.getLog(RSSWebSpiderAgent.class);
	/** URLڑ */
	private RSSHttpURLConnection con = null;
	/** Rec */
	private String content = null;
	/** CxgXi[ */
	private RSSWSAgentListener eventListener = null;
	/** s(true: ғ / false:~) */
	private boolean running = false;
	/** ̃G[WFgǗĂeNX */
	private RSSWebSpiderRobot robot = null;

	/**
	 * RXgN^
	 * @param r e{bg
	 */
	public RSSWebSpiderAgent(RSSWebSpiderRobot r) {
		robot = r;
		running = false;
	}
	/**
	 * G[WFg̎sԂԂ
	 * @return true:쒆 / false:I
	 */
	public boolean isRunning() {
		return running;
	}
	/**
	 * G[WFg̏
	 * @param remoteHost ڑzXg
	 * @param timeoutSec ڑ^CAEgb(msec)
	 * @throws MalformedURLException URL
	 * @throws IOException ڑɎs
	 * @throws TooManyListenersException CxgXi[̓o^
	 */
	public void setup(String remoteHost, int timeoutSec) throws MalformedURLException, IOException, TooManyListenersException {
		log.trace("Xbh̃ZbgAbvĂ܂");

		// owb_̓o^
		NVPair[] nvp = {
				new NVPair("User-Agent", robot.getSpiderConfig().getHttpUserAgent()),
				new NVPair("Cache-Control", "no-cache")
		};

		RSSHttpURLConnection c = RSSHttpURLConnectionFactory.getConnection(new URL(remoteHost));
		c.setAllowUserInteraction(false);
		c.setDefaultHeaders(nvp);
		c.setTimeout(timeoutSec);
		try {
			c.removeModule(Class.forName("HTTPClient.CookieModule"));
		} catch(Exception e) {
		}
		con = c;

		log.trace("Xbh̃ZbgAbv܂");
	}
	/**
	 * ڑĂ郊[gzXg(webT[o[)Ԃ
	 * @return [gzXg
	 * @throws IllegalObjectStateException [gzXgɐڑĂȂ
	 */
	public String getRemoteHost() throws IllegalObjectStateException {
		if(con == null) {
			throw new IllegalObjectStateException();
		}
		return con.getURL().toString();
	}
	/**
	 * XbhC
	 */
	public void run() {
		running = true;

		while(running) {
			log.trace("XbhڑJn܂");

			// ڑCxgs
			sendOnConnectEvent(new RSSWSAgentEvent(this));

			boolean rc = robot.AtTheBeginningOfAgentCallback(this);
			if(rc == false) {
				// Xbh̊Jn𖞂ĂȂꍇ́CďI
				robot.AtTheEndOfAgentCallback(this);
				return;
			}

			log.trace("Xbhrun()Jn܂");
			log.info("URL: " + con.getURL().toString());

			RSSHttpResponse res = null;
			try {
				res = RSSHttpURLConnectionFactory.getResponse(con, "8859_1");
				int responseCode = res.getResponseCode();

				if(robot.getSpiderConfig().isEnabledContentType(res.getHeader("Content-Type")) == false) {
					// 擾łȂ^Cv̏ꍇ̓f[^̂Ă
					content = null;
				} else if(responseCode == java.net.HttpURLConnection.HTTP_OK) {
					// Ƀy[W̎擾łꍇ
					log.trace("XbhRec̓ǂݍ݂n߂܂");
					content = res.getText();
					log.trace("XbhڑĂ܂");
				} else if(
					responseCode == java.net.HttpURLConnection.HTTP_MOVED_PERM ||
					responseCode == java.net.HttpURLConnection.HTTP_MOVED_TEMP
				) {
					// ]ꂽꍇ
					String forwardedTo = res.getHeader("Location");
					if(forwardedTo != null) {
						log.trace("]:" + forwardedTo);
					}
				} else {
					// ȊÕXe[^X(500403Ȃ)̏ꍇ͖
					content = null;
				}
			} catch(Exception e) {
				// 炩̃G[ĐڑłȂ
				String errmsg = "failed";
				if(res != null) {
					errmsg += "(" + res.getResponseCode() + ")";
				}
				errmsg += " : " + con.getURL().toString();

				log.info(errmsg);

				content = null;
			} finally {
				// ؒfCxgs
				log.trace("XbhonClose()Cxg𔭍sĂ܂");
				try {
					WebContents webContents = robot.getContentsAnalyzer().analyze(res, content);
					// 炩̃Rec擾łC邢robots.txt̏ꍇ̓Rec𑗐MD
					if(content != null || webContents.isRobotsTxtFlg() == true) {
						sendOnCloseEvent(new RSSWSAgentEvent(this, webContents));
					}
				} catch(Exception e) {
					log.error("Reco^ɗ\ȂG[̂Ŗ܂F", e);
				}
			}
		}
	}
	/**
	 * Xbh~D
	 * XbhƂĂꍇC炪iKňSɒ~D
	 */
	public void shutdown() {
		log.info("~MM܂DXbh~܂D");
		running = false;
		interrupt();
	}
	/**
	 * CxgXi[̓o^
	 * ɓo^ĂꍇAǉēo^łȂ
	 * @param listener CxgXi[
	 */
	public void addRSSWSAgentListener(RSSWSAgentListener listener) throws TooManyListenersException {
		// CxgXi[1o^łȂ
		// ̃CxgXi[o^悤Ƃꍇ́CO𓊂
		if(eventListener != null) {
			throw new TooManyListenersException();
		}
		eventListener = listener;
	}

	/**
	 * CxgXi[̍폜
	 */
	public void removeRSSWSAgentListener() {
		eventListener = null;
	}

	/**
	 * ڑCxg̑M
	 * @param e CxgIuWFNg
	 */
	public void sendOnConnectEvent(RSSWSAgentEvent e) {
		eventListener.onConnect(e);
	}

	/**
	 * ؒfCxg̑M
	 * @param e CxgIuWFNg
	 */
	public void sendOnCloseEvent(RSSWSAgentEvent e) {
		eventListener.onClose(e);
	}
}

// end of RSSWebSpiderAgent.java
