View Javadoc
1   /*
2    * Copyright 2010 James Pether Sörling
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   *	$Id$
17   *  $HeadURL$
18  */
19  package com.hack23.cia.service.external.riksdagen.impl;
20  
21  import java.math.BigInteger;
22  import java.util.ArrayList;
23  import java.util.List;
24  
25  import javax.xml.bind.JAXBElement;
26  
27  import org.slf4j.Logger;
28  import org.slf4j.LoggerFactory;
29  import org.springframework.beans.factory.annotation.Autowired;
30  import org.springframework.beans.factory.annotation.Qualifier;
31  import org.springframework.oxm.Unmarshaller;
32  import org.springframework.stereotype.Component;
33  
34  import com.hack23.cia.model.external.riksdagen.documentcontent.impl.DocumentContentData;
35  import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentContainerElement;
36  import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentElement;
37  import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentStatusContainer;
38  import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentType;
39  import com.hack23.cia.service.external.common.api.ProcessDataStrategy;
40  import com.hack23.cia.service.external.common.api.XmlAgent;
41  import com.hack23.cia.service.external.riksdagen.api.DataFailureException;
42  import com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi;
43  
44  /**
45   * The Class RiksdagenDocumentApiImpl.
46   */
47  @Component
48  final class RiksdagenDocumentApiImpl implements RiksdagenDocumentApi {
49  
50  	/** The Constant CHANGED_SINCE_KEY. */
51  	private static final String CHANGED_SINCE_KEY = "${CHANGED_SINCE}";
52  
53  	/** The Constant CHANGED_TO_KEY. */
54  	private static final String CHANGED_TO_KEY = "${CHANGED_TO}";
55  
56  	/** The Constant DOC_ID_KEY. */
57  	private static final String DOC_ID_KEY = "${DOC_ID}";
58  
59  	/** The Constant DOCUMENT_CONTENT. */
60  	private static final String DOCUMENT_CONTENT = "http://data.riksdagen.se/dokument/${DOC_ID}/text";
61  
62  	/** The Constant DOCUMENT_LIST_CHANGED_DATE. */
63  	private static final String DOCUMENT_LIST_CHANGED_DATE = "http://data.riksdagen.se/dokumentlista/?sok=&doktyp=&rm=&from=${CHANGED_SINCE}&tom=${CHANGED_TO}&ts=&bet=&tempbet=&nr=&org=&iid=&webbtv=&talare=&exakt=&planering=&sort=datum&sortorder=asc&rapport=&utformat=xml&a=";
64  
65  	/** The Constant DOCUMENT_LIST_TYPE. */
66  	private static final String DOCUMENT_LIST_TYPE = "http://data.riksdagen.se/dokumentlista/?rm=&typ=${TYPE}&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";
67  
68  	/** The Constant DOCUMENT_LIST_YEAR. */
69  	private static final String DOCUMENT_LIST_YEAR = "http://data.riksdagen.se/dokumentlista/?rm=${YEAR}&typ=&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";
70  
71  	/** The Constant DOCUMENT_STATUS. */
72  	private static final String DOCUMENT_STATUS = "http://data.riksdagen.se/dokumentstatus/${ID_KEY}/xml";
73  
74  	/** The Constant ERROR_PROCESSING_DOCUMENT. */
75  	private static final String ERROR_PROCESSING_DOCUMENT = "Error processing document :{}";
76  
77  	/**
78  	 * The Constant
79  	 * HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL.
80  	 */
81  	private static final String HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentlista.riksdagen.external.model.cia.hack23.com/impl";
82  
83  	/**
84  	 * The Constant
85  	 * HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL.
86  	 */
87  	private static final String HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentstatus.riksdagen.external.model.cia.hack23.com/impl";
88  
89  	/** The Constant ID_KEY. */
90  	private static final String ID_KEY = "${ID_KEY}";
91  
92  	/** The Constant LOADING_DOCUMENTS. */
93  	private static final String LOADING_DOCUMENTS = "Loading documents:{}/{}";
94  
95  	/** The Constant LOGGER. */
96  	private static final Logger LOGGER = LoggerFactory.getLogger(RiksdagenDocumentApiImpl.class);
97  
98  	/** The Constant PAGE_PROPERTY. */
99  	private static final String PAGE_PROPERTY = "&p=";
100 
101 	/**
102 	 * The Constant
103 	 * PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE.
104 	 */
105 	private static final String PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document content for id:{} from data.riksdagen.se";
106 
107 	/**
108 	 * The Constant
109 	 * PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE.
110 	 */
111 	private static final String PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list changedSinceDate:{} , changedToDate:{} from data.riksdagen.se";
112 
113 	/**
114 	 * The Constant
115 	 * PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE.
116 	 */
117 	private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for documentType:{} , maxNumberPages: {} from data.riksdagen.se";
118 
119 	/**
120 	 * The Constant
121 	 * PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE.
122 	 */
123 	private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for year: {} from data.riksdagen.se";
124 
125 	/**
126 	 * The Constant PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE.
127 	 */
128 	private static final String PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document status id:{}  from data.riksdagen.se";
129 
130 	/**
131 	 * The Constant
132 	 * PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE.
133 	 */
134 	private static final String PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE = "Problem proccessing document between changedSinceDate: {} and changeToDate {}";
135 
136 	/** The Constant TYPE_KEY. */
137 	private static final String TYPE_KEY = "${TYPE}";
138 
139 	/** The Constant YEAR_KEY. */
140 	private static final String YEAR_KEY = "${YEAR}";
141 
142 	/** The riksdagen document list marshaller. */
143 	@Autowired
144 	@Qualifier("riksdagenDocumentListMarshaller")
145 	private Unmarshaller riksdagenDocumentListMarshaller;
146 
147 	/** The riksdagen document status marshaller. */
148 	@Autowired
149 	@Qualifier("riksdagenDocumentStatusMarshaller")
150 	private Unmarshaller riksdagenDocumentStatusMarshaller;
151 
152 	/** The xml agent. */
153 	@Autowired
154 	private XmlAgent xmlAgent;
155 
156 	/**
157 	 * Instantiates a new riksdagen document api impl.
158 	 */
159 	public RiksdagenDocumentApiImpl() {
160 		super();
161 	}
162 
163 	/**
164 	 * Fix broken url.
165 	 *
166 	 * @param nextPage
167 	 *            the next page
168 	 * @return the string
169 	 */
170 	private static String fixBrokenUrl(final String nextPage) {
171 		if (nextPage.startsWith("//")) {
172 			return "http:" + nextPage;
173 		} else {
174 			return nextPage;
175 		}
176 	}
177 
178 	/**
179 	 * Process all.
180 	 *
181 	 * @param dokument
182 	 *            the dokument
183 	 * @param processStrategy
184 	 *            the process strategy
185 	 */
186 	private static void processAll(final List<DocumentElement> dokument,
187 			final ProcessDataStrategy<DocumentElement> processStrategy) {
188 		for (final DocumentElement documentElement : dokument) {
189 
190 			try {
191 				processStrategy.process(documentElement);
192 			} catch (final RuntimeException e) {
193 				LOGGER.warn(ERROR_PROCESSING_DOCUMENT, documentElement.getId(), e);
194 			}
195 		}
196 	}
197 
198 	@Override
199 	public DocumentContentData getDocumentContent(final String id) throws DataFailureException {
200 		try {
201 			return new DocumentContentData().withId(id)
202 					.withContent(xmlAgent.retriveContent(DOCUMENT_CONTENT.replace(DOC_ID_KEY, id)));
203 		} catch (final Exception e) {
204 			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
205 			throw new DataFailureException(e);
206 		}
207 	}
208 
209 	@Override
210 	public List<DocumentElement> getDocumentList(final DocumentType documentType, final int maxNumberPages)
211 			throws DataFailureException {
212 		try {
213 			return loadDocumentList(DOCUMENT_LIST_TYPE.replace(TYPE_KEY, documentType.value()), maxNumberPages);
214 		} catch (final Exception e) {
215 			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE,
216 					documentType.toString(), Integer.toString(maxNumberPages));
217 			throw new DataFailureException(e);
218 		}
219 	}
220 
221 	@Override
222 	public List<DocumentElement> getDocumentList(final Integer year, final int maxNumberPages)
223 			throws DataFailureException {
224 		try {
225 			return loadDocumentList(DOCUMENT_LIST_YEAR.replace(YEAR_KEY, year.toString()), maxNumberPages);
226 		} catch (final Exception e) {
227 			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE, year.toString());
228 			throw new DataFailureException(e);
229 		}
230 	}
231 
232 	@Override
233 	public List<DocumentElement> getDocumentList(final String changedSinceDate, final String changedToDate,
234 			final int maxNumberPages) throws DataFailureException {
235 		try {
236 			return loadDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
237 					.replace(CHANGED_TO_KEY, changedToDate), maxNumberPages);
238 		} catch (final Exception e) {
239 			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE,
240 					changedSinceDate, changedToDate);
241 			throw new DataFailureException(e);
242 		}
243 	}
244 
245 	@Override
246 	public DocumentStatusContainer getDocumentStatus(final String id) throws DataFailureException {
247 		try {
248 			final String url = DOCUMENT_STATUS.replace(ID_KEY, id);
249 			return ((JAXBElement<DocumentStatusContainer>) xmlAgent.unmarshallXml(riksdagenDocumentStatusMarshaller,
250 					url, HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
251 		} catch (final Exception e) {
252 			LOGGER.warn(PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
253 			throw new DataFailureException(e);
254 		}
255 	}
256 
257 	/**
258 	 * Load and process document list.
259 	 *
260 	 * @param url
261 	 *            the url
262 	 * @param processStrategy
263 	 *            the process strategy
264 	 * @throws Exception
265 	 *             the exception
266 	 */
267 	private void loadAndProcessDocumentList(final String url,
268 			final ProcessDataStrategy<DocumentElement> processStrategy) throws Exception {
269 		final DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
270 				riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
271 				null, null)).getValue();
272 
273 		int resultSize = dokumentLista.getDokument().size();
274 		processAll(dokumentLista.getDokument(), processStrategy);
275 		final BigInteger pages = dokumentLista.getTotalPages();
276 		for (int i = 1; i < pages.intValue(); i++) {
277 			final DocumentContainerElement otherPagesdokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent
278 					.unmarshallXml(riksdagenDocumentListMarshaller, url + PAGE_PROPERTY + i,
279 							HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
280 			resultSize = resultSize + otherPagesdokumentLista.getDokument().size();
281 			processAll(otherPagesdokumentLista.getDokument(), processStrategy);
282 			LOGGER.info(LOADING_DOCUMENTS, resultSize, dokumentLista.getHits());
283 		}
284 	}
285 
286 	/**
287 	 * Load document list.
288 	 *
289 	 * @param url
290 	 *            the url
291 	 * @param maxNumberPages
292 	 *            the max number pages
293 	 * @return the list
294 	 * @throws Exception
295 	 *             the exception
296 	 */
297 	private List<DocumentElement> loadDocumentList(final String url, final int maxNumberPages) throws Exception {
298 		final List<DocumentElement> result = new ArrayList<>();
299 
300 		DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
301 				riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
302 				null, null)).getValue();
303 		result.addAll(dokumentLista.getDokument());
304 		final BigInteger pages = dokumentLista.getTotalPages();
305 		for (int i = 1; i < pages.intValue() && i < maxNumberPages; i++) {
306 			dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
307 					riksdagenDocumentListMarshaller, fixBrokenUrl(dokumentLista.getNextPage()),
308 					HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
309 			result.addAll(dokumentLista.getDokument());
310 			LOGGER.info(LOADING_DOCUMENTS, result.size(), dokumentLista.getHits());
311 		}
312 
313 		return result;
314 	}
315 
316 	@Override
317 	public void processDocumentList(final String changedSinceDate, final String changedToDate,
318 			final ProcessDataStrategy<DocumentElement> processStrategy) throws DataFailureException {
319 		try {
320 			loadAndProcessDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
321 					.replace(CHANGED_TO_KEY, changedToDate), processStrategy);
322 		} catch (final Exception e) {
323 			LOGGER.warn(PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE, changedSinceDate,
324 					changedToDate);
325 			throw new DataFailureException(e);
326 		}
327 	}
328 
329 }