RiksdagenDocumentApiImpl.java

/*
 * Copyright 2010 James Pether Sörling
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *	$Id$
 *  $HeadURL$
*/
package com.hack23.cia.service.external.riksdagen.impl;

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;

import javax.xml.bind.JAXBElement;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.oxm.Unmarshaller;
import org.springframework.stereotype.Component;

import com.hack23.cia.model.external.riksdagen.documentcontent.impl.DocumentContentData;
import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentContainerElement;
import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentElement;
import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentStatusContainer;
import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentType;
import com.hack23.cia.service.external.common.api.ProcessDataStrategy;
import com.hack23.cia.service.external.common.api.XmlAgent;
import com.hack23.cia.service.external.riksdagen.api.DataFailureException;
import com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi;

/**
 * The Class RiksdagenDocumentApiImpl.
 */
@Component
final class RiksdagenDocumentApiImpl implements RiksdagenDocumentApi {

	/** The Constant CHANGED_SINCE_KEY. */
	private static final String CHANGED_SINCE_KEY = "${CHANGED_SINCE}";

	/** The Constant CHANGED_TO_KEY. */
	private static final String CHANGED_TO_KEY = "${CHANGED_TO}";

	/** The Constant DOC_ID_KEY. */
	private static final String DOC_ID_KEY = "${DOC_ID}";

	/** The Constant DOCUMENT_CONTENT. */
	private static final String DOCUMENT_CONTENT = "http://data.riksdagen.se/dokument/${DOC_ID}/text";

	/** The Constant DOCUMENT_LIST_CHANGED_DATE. */
	private static final String DOCUMENT_LIST_CHANGED_DATE = "http://data.riksdagen.se/dokumentlista/?sok=&doktyp=&rm=&from=${CHANGED_SINCE}&tom=${CHANGED_TO}&ts=&bet=&tempbet=&nr=&org=&iid=&webbtv=&talare=&exakt=&planering=&sort=datum&sortorder=asc&rapport=&utformat=xml&a=";

	/** The Constant DOCUMENT_LIST_TYPE. */
	private static final String DOCUMENT_LIST_TYPE = "http://data.riksdagen.se/dokumentlista/?rm=&typ=${TYPE}&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";

	/** The Constant DOCUMENT_LIST_YEAR. */
	private static final String DOCUMENT_LIST_YEAR = "http://data.riksdagen.se/dokumentlista/?rm=${YEAR}&typ=&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";

	/** The Constant DOCUMENT_STATUS. */
	private static final String DOCUMENT_STATUS = "http://data.riksdagen.se/dokumentstatus/${ID_KEY}/xml";

	/** The Constant ERROR_PROCESSING_DOCUMENT. */
	private static final String ERROR_PROCESSING_DOCUMENT = "Error processing document :{}";

	/**
	 * The Constant
	 * HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL.
	 */
	private static final String HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentlista.riksdagen.external.model.cia.hack23.com/impl";

	/**
	 * The Constant
	 * HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL.
	 */
	private static final String HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentstatus.riksdagen.external.model.cia.hack23.com/impl";

	/** The Constant ID_KEY. */
	private static final String ID_KEY = "${ID_KEY}";

	/** The Constant LOADING_DOCUMENTS. */
	private static final String LOADING_DOCUMENTS = "Loading documents:{}/{}";

	/** The Constant LOGGER. */
	private static final Logger LOGGER = LoggerFactory.getLogger(RiksdagenDocumentApiImpl.class);

	/** The Constant PAGE_PROPERTY. */
	private static final String PAGE_PROPERTY = "&p=";

	/**
	 * The Constant
	 * PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE.
	 */
	private static final String PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document content for id:{} from data.riksdagen.se";

	/**
	 * The Constant
	 * PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE.
	 */
	private static final String PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list changedSinceDate:{} , changedToDate:{} from data.riksdagen.se";

	/**
	 * The Constant
	 * PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE.
	 */
	private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for documentType:{} , maxNumberPages: {} from data.riksdagen.se";

	/**
	 * The Constant
	 * PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE.
	 */
	private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for year: {} from data.riksdagen.se";

	/**
	 * The Constant PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE.
	 */
	private static final String PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document status id:{}  from data.riksdagen.se";

	/**
	 * The Constant
	 * PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE.
	 */
	private static final String PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE = "Problem proccessing document between changedSinceDate: {} and changeToDate {}";

	/** The Constant TYPE_KEY. */
	private static final String TYPE_KEY = "${TYPE}";

	/** The Constant YEAR_KEY. */
	private static final String YEAR_KEY = "${YEAR}";

	/** The riksdagen document list marshaller. */
	@Autowired
	@Qualifier("riksdagenDocumentListMarshaller")
	private Unmarshaller riksdagenDocumentListMarshaller;

	/** The riksdagen document status marshaller. */
	@Autowired
	@Qualifier("riksdagenDocumentStatusMarshaller")
	private Unmarshaller riksdagenDocumentStatusMarshaller;

	/** The xml agent. */
	@Autowired
	private XmlAgent xmlAgent;

	/**
	 * Instantiates a new riksdagen document api impl.
	 */
	public RiksdagenDocumentApiImpl() {
		super();
	}

	/**
	 * Fix broken url.
	 *
	 * @param nextPage
	 *            the next page
	 * @return the string
	 */
	private static String fixBrokenUrl(final String nextPage) {
		if (nextPage.startsWith("//")) {
			return "http:" + nextPage;
		} else {
			return nextPage;
		}
	}

	/**
	 * Process all.
	 *
	 * @param dokument
	 *            the dokument
	 * @param processStrategy
	 *            the process strategy
	 */
	private static void processAll(final List<DocumentElement> dokument,
			final ProcessDataStrategy<DocumentElement> processStrategy) {
		for (final DocumentElement documentElement : dokument) {

			try {
				processStrategy.process(documentElement);
			} catch (final RuntimeException e) {
				LOGGER.warn(ERROR_PROCESSING_DOCUMENT, documentElement.getId(), e);
			}
		}
	}

	@Override
	public DocumentContentData getDocumentContent(final String id) throws DataFailureException {
		try {
			return new DocumentContentData().withId(id)
					.withContent(xmlAgent.retriveContent(DOCUMENT_CONTENT.replace(DOC_ID_KEY, id)));
		} catch (final Exception e) {
			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
			throw new DataFailureException(e);
		}
	}

	@Override
	public List<DocumentElement> getDocumentList(final DocumentType documentType, final int maxNumberPages)
			throws DataFailureException {
		try {
			return loadDocumentList(DOCUMENT_LIST_TYPE.replace(TYPE_KEY, documentType.value()), maxNumberPages);
		} catch (final Exception e) {
			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE,
					documentType.toString(), Integer.toString(maxNumberPages));
			throw new DataFailureException(e);
		}
	}

	@Override
	public List<DocumentElement> getDocumentList(final Integer year, final int maxNumberPages)
			throws DataFailureException {
		try {
			return loadDocumentList(DOCUMENT_LIST_YEAR.replace(YEAR_KEY, year.toString()), maxNumberPages);
		} catch (final Exception e) {
			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE, year.toString());
			throw new DataFailureException(e);
		}
	}

	@Override
	public List<DocumentElement> getDocumentList(final String changedSinceDate, final String changedToDate,
			final int maxNumberPages) throws DataFailureException {
		try {
			return loadDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
					.replace(CHANGED_TO_KEY, changedToDate), maxNumberPages);
		} catch (final Exception e) {
			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE,
					changedSinceDate, changedToDate);
			throw new DataFailureException(e);
		}
	}

	@Override
	public DocumentStatusContainer getDocumentStatus(final String id) throws DataFailureException {
		try {
			final String url = DOCUMENT_STATUS.replace(ID_KEY, id);
			return ((JAXBElement<DocumentStatusContainer>) xmlAgent.unmarshallXml(riksdagenDocumentStatusMarshaller,
					url, HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
		} catch (final Exception e) {
			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
			throw new DataFailureException(e);
		}
	}

	/**
	 * Load and process document list.
	 *
	 * @param url
	 *            the url
	 * @param processStrategy
	 *            the process strategy
	 * @throws Exception
	 *             the exception
	 */
	private void loadAndProcessDocumentList(final String url,
			final ProcessDataStrategy<DocumentElement> processStrategy) throws Exception {
		final DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
				riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
				null, null)).getValue();

		int resultSize = dokumentLista.getDokument().size();
		processAll(dokumentLista.getDokument(), processStrategy);
		final BigInteger pages = dokumentLista.getTotalPages();
		for (int i = 1; i < pages.intValue(); i++) {
			final DocumentContainerElement otherPagesdokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent
					.unmarshallXml(riksdagenDocumentListMarshaller, url + PAGE_PROPERTY + i,
							HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
			resultSize = resultSize + otherPagesdokumentLista.getDokument().size();
			processAll(otherPagesdokumentLista.getDokument(), processStrategy);
			LOGGER.info(LOADING_DOCUMENTS, resultSize, dokumentLista.getHits());
		}
	}

	/**
	 * Load document list.
	 *
	 * @param url
	 *            the url
	 * @param maxNumberPages
	 *            the max number pages
	 * @return the list
	 * @throws Exception
	 *             the exception
	 */
	private List<DocumentElement> loadDocumentList(final String url, final int maxNumberPages) throws Exception {
		final List<DocumentElement> result = new ArrayList<>();

		DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
				riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
				null, null)).getValue();
		result.addAll(dokumentLista.getDokument());
		final BigInteger pages = dokumentLista.getTotalPages();
		for (int i = 1; i < pages.intValue() && i < maxNumberPages; i++) {
			dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
					riksdagenDocumentListMarshaller, fixBrokenUrl(dokumentLista.getNextPage()),
					HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
			result.addAll(dokumentLista.getDokument());
			LOGGER.info(LOADING_DOCUMENTS, result.size(), dokumentLista.getHits());
		}

		return result;
	}

	@Override
	public void processDocumentList(final String changedSinceDate, final String changedToDate,
			final ProcessDataStrategy<DocumentElement> processStrategy) throws DataFailureException {
		try {
			loadAndProcessDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
					.replace(CHANGED_TO_KEY, changedToDate), processStrategy);
		} catch (final Exception e) {
			LOGGER.warn(PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE, changedSinceDate,
					changedToDate);
			throw new DataFailureException(e);
		}
	}

}