1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package com.hack23.cia.service.external.riksdagen.impl;
20
21 import java.math.BigInteger;
22 import java.util.ArrayList;
23 import java.util.List;
24
25 import javax.xml.bind.JAXBElement;
26
27 import org.slf4j.Logger;
28 import org.slf4j.LoggerFactory;
29 import org.springframework.beans.factory.annotation.Autowired;
30 import org.springframework.beans.factory.annotation.Qualifier;
31 import org.springframework.oxm.Unmarshaller;
32 import org.springframework.stereotype.Component;
33
34 import com.hack23.cia.model.external.riksdagen.documentcontent.impl.DocumentContentData;
35 import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentContainerElement;
36 import com.hack23.cia.model.external.riksdagen.dokumentlista.impl.DocumentElement;
37 import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentStatusContainer;
38 import com.hack23.cia.model.external.riksdagen.dokumentstatus.impl.DocumentType;
39 import com.hack23.cia.service.external.common.api.ProcessDataStrategy;
40 import com.hack23.cia.service.external.common.api.XmlAgent;
41 import com.hack23.cia.service.external.riksdagen.api.DataFailureException;
42 import com.hack23.cia.service.external.riksdagen.api.RiksdagenDocumentApi;
43
44
45
46
47 @Component
48 final class RiksdagenDocumentApiImpl implements RiksdagenDocumentApi {
49
50
51 private static final String CHANGED_SINCE_KEY = "${CHANGED_SINCE}";
52
53
54 private static final String CHANGED_TO_KEY = "${CHANGED_TO}";
55
56
57 private static final String DOC_ID_KEY = "${DOC_ID}";
58
59
60 private static final String DOCUMENT_CONTENT = "http://data.riksdagen.se/dokument/${DOC_ID}/text";
61
62
63 private static final String DOCUMENT_LIST_CHANGED_DATE = "http://data.riksdagen.se/dokumentlista/?sok=&doktyp=&rm=&from=${CHANGED_SINCE}&tom=${CHANGED_TO}&ts=&bet=&tempbet=&nr=&org=&iid=&webbtv=&talare=&exakt=&planering=&sort=datum&sortorder=asc&rapport=&utformat=xml&a=";
64
65
66 private static final String DOCUMENT_LIST_TYPE = "http://data.riksdagen.se/dokumentlista/?rm=&typ=${TYPE}&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";
67
68
69 private static final String DOCUMENT_LIST_YEAR = "http://data.riksdagen.se/dokumentlista/?rm=${YEAR}&typ=&d=&ts=&parti=&iid=&bet=&org=&kat=&sz=200&sort=c&utformat=xml";
70
71
72 private static final String DOCUMENT_STATUS = "http://data.riksdagen.se/dokumentstatus/${ID_KEY}/xml";
73
74
75 private static final String ERROR_PROCESSING_DOCUMENT = "Error processing document :{}";
76
77
78
79
80
81 private static final String HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentlista.riksdagen.external.model.cia.hack23.com/impl";
82
83
84
85
86
87 private static final String HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL = "http://dokumentstatus.riksdagen.external.model.cia.hack23.com/impl";
88
89
90 private static final String ID_KEY = "${ID_KEY}";
91
92
93 private static final String LOADING_DOCUMENTS = "Loading documents:{}/{}";
94
95
96 private static final Logger LOGGER = LoggerFactory.getLogger(RiksdagenDocumentApiImpl.class);
97
98
99 private static final String PAGE_PROPERTY = "&p=";
100
101
102
103
104
105 private static final String PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document content for id:{} from data.riksdagen.se";
106
107
108
109
110
111 private static final String PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list changedSinceDate:{} , changedToDate:{} from data.riksdagen.se";
112
113
114
115
116
117 private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for documentType:{} , maxNumberPages: {} from data.riksdagen.se";
118
119
120
121
122
123 private static final String PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document list for year: {} from data.riksdagen.se";
124
125
126
127
128 private static final String PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE = "Problem getting document status id:{} from data.riksdagen.se";
129
130
131
132
133
134 private static final String PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE = "Problem proccessing document between changedSinceDate: {} and changeToDate {}";
135
136
137 private static final String TYPE_KEY = "${TYPE}";
138
139
140 private static final String YEAR_KEY = "${YEAR}";
141
142
143 @Autowired
144 @Qualifier("riksdagenDocumentListMarshaller")
145 private Unmarshaller riksdagenDocumentListMarshaller;
146
147
148 @Autowired
149 @Qualifier("riksdagenDocumentStatusMarshaller")
150 private Unmarshaller riksdagenDocumentStatusMarshaller;
151
152
153 @Autowired
154 private XmlAgent xmlAgent;
155
156
157
158
159 public RiksdagenDocumentApiImpl() {
160 super();
161 }
162
163
164
165
166
167
168
169
170 private static String fixBrokenUrl(final String nextPage) {
171 if (nextPage.startsWith("//")) {
172 return "http:" + nextPage;
173 } else {
174 return nextPage;
175 }
176 }
177
178
179
180
181
182
183
184
185
186 private static void processAll(final List<DocumentElement> dokument,
187 final ProcessDataStrategy<DocumentElement> processStrategy) {
188 for (final DocumentElement documentElement : dokument) {
189
190 try {
191 processStrategy.process(documentElement);
192 } catch (final RuntimeException e) {
193 LOGGER.warn(ERROR_PROCESSING_DOCUMENT, documentElement.getId(), e);
194 }
195 }
196 }
197
198 @Override
199 public DocumentContentData getDocumentContent(final String id) throws DataFailureException {
200 try {
201 return new DocumentContentData().withId(id)
202 .withContent(xmlAgent.retriveContent(DOCUMENT_CONTENT.replace(DOC_ID_KEY, id)));
203 } catch (final Exception e) {
204 LOGGER.warn(PROBLEM_GETTING_DOCUMENT_CONTENT_FOR_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
205 throw new DataFailureException(e);
206 }
207 }
208
209 @Override
210 public List<DocumentElement> getDocumentList(final DocumentType documentType, final int maxNumberPages)
211 throws DataFailureException {
212 try {
213 return loadDocumentList(DOCUMENT_LIST_TYPE.replace(TYPE_KEY, documentType.value()), maxNumberPages);
214 } catch (final Exception e) {
215 LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_DOCUMENT_TYPE_S_MAX_NUMBER_PAGES_S_FROM_DATA_RIKSDAGEN_SE,
216 documentType.toString(), Integer.toString(maxNumberPages));
217 throw new DataFailureException(e);
218 }
219 }
220
221 @Override
222 public List<DocumentElement> getDocumentList(final Integer year, final int maxNumberPages)
223 throws DataFailureException {
224 try {
225 return loadDocumentList(DOCUMENT_LIST_YEAR.replace(YEAR_KEY, year.toString()), maxNumberPages);
226 } catch (final Exception e) {
227 LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_FOR_YEAR_S_FROM_DATA_RIKSDAGEN_SE, year.toString());
228 throw new DataFailureException(e);
229 }
230 }
231
232 @Override
233 public List<DocumentElement> getDocumentList(final String changedSinceDate, final String changedToDate,
234 final int maxNumberPages) throws DataFailureException {
235 try {
236 return loadDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
237 .replace(CHANGED_TO_KEY, changedToDate), maxNumberPages);
238 } catch (final Exception e) {
239 LOGGER.warn(PROBLEM_GETTING_DOCUMENT_LIST_CHANGED_SINCE_DATE_S_CHANGED_TO_DATE_S_FROM_DATA_RIKSDAGEN_SE,
240 changedSinceDate, changedToDate);
241 throw new DataFailureException(e);
242 }
243 }
244
245 @Override
246 public DocumentStatusContainer getDocumentStatus(final String id) throws DataFailureException {
247 try {
248 final String url = DOCUMENT_STATUS.replace(ID_KEY, id);
249 return ((JAXBElement<DocumentStatusContainer>) xmlAgent.unmarshallXml(riksdagenDocumentStatusMarshaller,
250 url, HTTP_DOKUMENTSTATUS_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
251 } catch (final Exception e) {
252 LOGGER.warn(PROBLEM_GETTING_DOCUMENT_STATUS_ID_S_FROM_DATA_RIKSDAGEN_SE, id);
253 throw new DataFailureException(e);
254 }
255 }
256
257
258
259
260
261
262
263
264
265
266
267 private void loadAndProcessDocumentList(final String url,
268 final ProcessDataStrategy<DocumentElement> processStrategy) throws Exception {
269 final DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
270 riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
271 null, null)).getValue();
272
273 int resultSize = dokumentLista.getDokument().size();
274 processAll(dokumentLista.getDokument(), processStrategy);
275 final BigInteger pages = dokumentLista.getTotalPages();
276 for (int i = 1; i < pages.intValue(); i++) {
277 final DocumentContainerElement otherPagesdokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent
278 .unmarshallXml(riksdagenDocumentListMarshaller, url + PAGE_PROPERTY + i,
279 HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
280 resultSize = resultSize + otherPagesdokumentLista.getDokument().size();
281 processAll(otherPagesdokumentLista.getDokument(), processStrategy);
282 LOGGER.info(LOADING_DOCUMENTS, resultSize, dokumentLista.getHits());
283 }
284 }
285
286
287
288
289
290
291
292
293
294
295
296
297 private List<DocumentElement> loadDocumentList(final String url, final int maxNumberPages) throws Exception {
298 final List<DocumentElement> result = new ArrayList<>();
299
300 DocumentContainerElement dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
301 riksdagenDocumentListMarshaller, url, HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL,
302 null, null)).getValue();
303 result.addAll(dokumentLista.getDokument());
304 final BigInteger pages = dokumentLista.getTotalPages();
305 for (int i = 1; i < pages.intValue() && i < maxNumberPages; i++) {
306 dokumentLista = ((JAXBElement<DocumentContainerElement>) xmlAgent.unmarshallXml(
307 riksdagenDocumentListMarshaller, fixBrokenUrl(dokumentLista.getNextPage()),
308 HTTP_DOKUMENTLISTA_RIKSDAGEN_EXTERNAL_MODEL_CIA_HACK23_COM_IMPL, null, null)).getValue();
309 result.addAll(dokumentLista.getDokument());
310 LOGGER.info(LOADING_DOCUMENTS, result.size(), dokumentLista.getHits());
311 }
312
313 return result;
314 }
315
316 @Override
317 public void processDocumentList(final String changedSinceDate, final String changedToDate,
318 final ProcessDataStrategy<DocumentElement> processStrategy) throws DataFailureException {
319 try {
320 loadAndProcessDocumentList(DOCUMENT_LIST_CHANGED_DATE.replace(CHANGED_SINCE_KEY, changedSinceDate)
321 .replace(CHANGED_TO_KEY, changedToDate), processStrategy);
322 } catch (final Exception e) {
323 LOGGER.warn(PROBLEM_PROCCESSING_DOCUMENT_BETWEEN_CHANGED_SINCE_DATE_S_AND_CHANGE_TO_DATE, changedSinceDate,
324 changedToDate);
325 throw new DataFailureException(e);
326 }
327 }
328
329 }