1
2
3
4
5
6 package com.hack23.cia.service.impl.agent.sweden;
7
8 import java.text.SimpleDateFormat;
9 import java.util.ArrayList;
10 import java.util.Date;
11 import java.util.Iterator;
12 import java.util.List;
13
14 import org.apache.commons.logging.Log;
15 import org.apache.commons.logging.LogFactory;
16
17 import com.gargoylesoftware.htmlunit.WebClient;
18 import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
19 import com.gargoylesoftware.htmlunit.html.HtmlElement;
20 import com.gargoylesoftware.htmlunit.html.HtmlPage;
21 import com.gargoylesoftware.htmlunit.html.HtmlTable;
22 import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
23 import com.hack23.cia.model.sweden.impl.CommitteeReport;
24
25 /***
26 * The Class CommitteeReportAgentImpl.
27 */
28 public class CommitteeReportAgentImpl implements CommitteeReportAgent {
29
30 /***
31 * The Class DocumentAnswerPage.
32 */
33 class DocumentAnswerPage {
34
35 /*** The anchors. */
36 private List<HtmlAnchor> anchors;;
37
38 /*** The next page link row. */
39 private HtmlTableRow nextPageLinkRow = null;
40
41 /***
42 * Instantiates a new document answer page.
43 *
44 * @param page the page
45 */
46 public DocumentAnswerPage(final HtmlPage page) {
47 try {
48 final HtmlElement answerDiv = page.getHtmlElementById(ANSWER);
49 final Iterator<HtmlElement> iterator = answerDiv
50 .getHtmlElementsByTagName(TABLE).iterator();
51 if (iterator.hasNext()) {
52 final HtmlTable table = (HtmlTable) iterator.next();
53
54 anchors = table.getHtmlElementsByTagName(ANCHOR);
55
56 final List<HtmlTableRow> rows = new ArrayList<HtmlTableRow>(table
57 .getRows());
58 rows.remove(0);
59 nextPageLinkRow = rows.remove(0);
60 rows.remove(rows.size() - 1);
61 return;
62 } else {
63 LOGGER
64 .warn("Problem with page : " + page.getPage().getTitleText() + "\n\n" + page.asXml());
65 }
66 } catch (final Exception e) {
67 LOGGER.warn(e);
68 }
69 }
70
71 /***
72 * Gets the anchors.
73 *
74 * @return the anchors
75 */
76 public List<HtmlAnchor> getAnchors() {
77 return anchors;
78 }
79
80 /***
81 * Gets the next page.
82 *
83 * @return the next page
84 * @throws Exception the exception
85 */
86 public DocumentAnswerPage getNextPage() throws Exception {
87 if (nextPageLinkRow != null) {
88 final List<HtmlAnchor> anchors = nextPageLinkRow
89 .getHtmlElementsByTagName(ANCHOR);
90
91 for (final HtmlAnchor anchor : anchors) {
92 if (NEXT.equals(anchor.asText())
93 || NEXT_VERSION2.equals(anchor.asText())) {
94 return new DocumentAnswerPage((HtmlPage) anchor.click());
95 }
96 }
97 }
98 return null;
99 }
100 }
101
102 /*** The Constant ANCHOR. */
103 private static final String ANCHOR = "a";
104
105 /*** The Constant ANSWER. */
106 private static final String ANSWER = "svar";
107
108 /*** The Constant CENTER_PADDING. */
109 private static final String CENTER_PADDING = "centerPadding";
110
111 /*** The Constant CLASS. */
112 private static final String CLASS = "class";
113
114 /*** The Constant COMMITEE_REPORTS_CONTAIN. */
115 private static final String COMMITEE_REPORTS_CONTAIN = "http://www.riksdagen.se/webbnav/?nid=3120&doktyp=betankande&bet"; //$NON-NLS-1$
116
117 /*** The Constant COMMITEE_REPORTS_PERIOD_2007_08. */
118 private static final String COMMITEE_REPORTS_PERIOD_2010_11 = "http://www.riksdagen.se/webbnav/index.aspx?nid=3110&titel=&rm=2010%2F11&bet=&doktyp=bet%C3%A4nkande&org=&s=S%C3%B6k#t%22"; //$NON-NLS-1$
119
120 /*** The Constant DECISION. */
121 private static final String DECISION = "Beslut:";
122
123 /*** The Constant DIV. */
124 private static final String DIV = "div";
125
126 /*** The Constant LOGGER. */
127 private static final Log LOGGER = LogFactory
128 .getLog(CommitteeReportAgentImpl.class);
129
130 /*** The Constant NEXT. */
131 private static final String NEXT = "nästa sida >";
132
133 /*** The Constant NEXT_VERSION2. */
134 private static final String NEXT_VERSION2 = "nästa >";
135
136 /*** The Constant NORMAL. */
137 private static final String NORMAL = "normal";
138
139 /*** The Constant PARLIAMENT_DECISION. */
140 private static final String PARLIAMENT_DECISION = "Riksdagens beslut";
141
142 /*** The Constant SPAN. */
143 private static final String SPAN = "span";
144
145 /*** The Constant TABLE. */
146 private static final String TABLE = "table";
147
148 /*** The format. */
149 private final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
150
151 /*** The web client. */
152 private final WebClient webClient;
153
154 /***
155 * Instantiates a new committee report agent impl.
156 *
157 * @param webClient the web client
158 */
159 public CommitteeReportAgentImpl(final WebClient webClient) {
160 super();
161 this.webClient = webClient;
162 this.webClient.setJavaScriptEnabled(false);
163 }
164
165
166
167
168
169
170
171
172 @Override
173 public final List<CommitteeReport> getCurrentList() throws Exception {
174 final List<CommitteeReport> resultat = new ArrayList<CommitteeReport>();
175
176 DocumentAnswerPage answerPage = new DocumentAnswerPage(
177 (HtmlPage) webClient.getPage(COMMITEE_REPORTS_PERIOD_2010_11));
178
179 while (answerPage != null) {
180 for (final HtmlAnchor anchor : answerPage.getAnchors()) {
181
182 if (anchor.getHrefAttribute()
183 .contains(COMMITEE_REPORTS_CONTAIN)) {
184
185 final CommitteeReport commiteeReport = new CommitteeReport();
186 commiteeReport.setName(anchor.asText());
187 commiteeReport.setHref(anchor.getHrefAttribute());
188 resultat.add(commiteeReport);
189 }
190 }
191 if (answerPage != null) {
192 answerPage = answerPage.getNextPage();
193 }
194 }
195 LOGGER.info("CommiteeReports found : " + resultat.size());
196 return resultat;
197 }
198
199
200
201
202
203
204
205
206 @Override
207 public final Date getDecidedDateIfAny(final CommitteeReport commiteeReport)
208 throws Exception {
209 final HtmlPage page = (HtmlPage) webClient.getPage(commiteeReport.getHref());
210 final HtmlElement contentDiv = page.getDocumentElement()
211 .getElementsByAttribute(DIV, CLASS, CENTER_PADDING).iterator()
212 .next();
213
214 final List<HtmlElement> contentBlocks = contentDiv.getElementsByAttribute(
215 SPAN, CLASS, NORMAL);
216
217 LOGGER
218 .info("Checking if decision has been made " + commiteeReport.getHref());
219 for (final HtmlElement element : contentBlocks) {
220 final String str = element.asText().trim();
221 if (str.startsWith(PARLIAMENT_DECISION)) {
222
223 final int startIndex = str.indexOf(DECISION);
224
225 if (startIndex >= 0) {
226 final String dateStr = str.substring(startIndex + 8,
227 startIndex + 18).replace("/", "-");
228
229 return parseDate(dateStr);
230 } else {
231 return null;
232 }
233 }
234 }
235 return null;
236 }
237
238 /***
239 * Parses the date.
240 *
241 * @param dateStr the date str
242 * @return the date
243 */
244 private Date parseDate(final String dateStr) {
245 try {
246 return format.parse(dateStr);
247 } catch (final Exception pe) {
248 LOGGER.warn("Problem parsing date ;" + dateStr, pe);
249 }
250 return null;
251 }
252 }