1
2
3
4
5
6 package com.hack23.cia.service.impl.agent.sweden;
7
8 import gnu.trove.THashMap;
9
10 import java.util.List;
11 import java.util.Map;
12
13 import org.apache.commons.logging.Log;
14 import org.apache.commons.logging.LogFactory;
15
16 import com.gargoylesoftware.htmlunit.WebClient;
17 import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
18 import com.gargoylesoftware.htmlunit.html.HtmlPage;
19 import com.gargoylesoftware.htmlunit.html.HtmlTable;
20 import com.gargoylesoftware.htmlunit.html.HtmlTableCell;
21 import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
22 import com.hack23.cia.model.sweden.impl.ParliamentMember;
23
24 /***
25 * The Class ParliamentMemberAgentImpl.
26 */
27 public class ParliamentMemberAgentImpl implements ParliamentMemberAgent {
28
29 /*** The Constant ANCHOR. */
30 private static final String ANCHOR = "a";
31
32 /*** The Constant LOGGER. */
33 private static final Log LOGGER = LogFactory
34 .getLog(ParliamentMemberAgentImpl.class);
35
36 /*** The Constant PARLIAMENT_MEMBER_ENGLISH_WIKI_LIST. */
37 private static final String PARLIAMENT_MEMBER_ENGLISH_WIKI_LIST = "http://en.wikipedia.org/wiki/List_of_members_of_the_parliament_of_Sweden,_2010%E2%80%932014"; //$NON-NLS-1$
38
39 /*** The Constant PARLIAMENT_MEMBER_LIST. */
40 private static final String PARLIAMENT_MEMBER_LIST = "http://www.riksdagen.se/webbnav/index.aspx?fnamn=&enamn=&f_ar=&kn=&party=&electoralRegion=&rdlstatus=&org=&sort=&s=1&nid=1102"; //$NON-NLS-1$
41
42 /*** The Constant PARLIAMENT_MEMBER_SWEDISH_WIKI_LIST. */
43 private static final String PARLIAMENT_MEMBER_SWEDISH_WIKI_LIST = "http://sv.wikipedia.org/wiki/Lista_%C3%B6ver_ledam%C3%B6ter_av_Sveriges_riksdag_2010%E2%80%932014"; //$NON-NLS-1$
44
45 /*** The english wiki href map. */
46 private final Map<String, String> englishWikiHrefMap = new THashMap<String, String>();
47
48 /*** The href map. */
49 private final Map<String, String> hrefMap = new THashMap<String, String>();
50
51 /*** The web client. */
52 private final WebClient webClient;
53
54 /*** The wiki href map. */
55 private final Map<String, String> wikiHrefMap = new THashMap<String, String>();
56
57 /***
58 * Instantiates a new parliament member agent impl.
59 *
60 * @param webClient the web client
61 */
62 public ParliamentMemberAgentImpl(final WebClient webClient) {
63 super();
64 this.webClient = webClient;
65 }
66
67 /***
68 * Extract name.
69 *
70 * @param anchor the anchor
71 * @return the string
72 */
73 private String extractName(final HtmlAnchor anchor) {
74 String name = anchor.asText().replace(".", "");
75
76 final String[] split = name.trim().split(" ");
77
78 if (split.length == 2) {
79 name = split[1] + ", " + split[0];
80 } else {
81 name = split[1] + " " + split[2] + ", " + split[0];
82 }
83 return name;
84 }
85
86
87
88
89
90
91
92
93 @Override
94 public final String getEnglishWikiHref(
95 final ParliamentMember parliamentMember) {
96 return englishWikiHrefMap.get(parliamentMember.getName());
97 }
98
99
100
101
102
103
104
105
106 @Override
107 public final String getHref(final ParliamentMember parliamentMember) {
108 return hrefMap.get(parliamentMember.getName());
109 }
110
111
112
113
114
115
116
117
118 @Override
119 public final String getWikiHref(final ParliamentMember parliamentMember) {
120 return wikiHrefMap.get(parliamentMember.getName());
121 }
122
123
124
125
126
127
128 @Override
129 public final void initData() {
130 try {
131 final HtmlPage htmlPage = (HtmlPage) webClient
132 .getPage(PARLIAMENT_MEMBER_LIST);
133 final List<HtmlAnchor> anchors = htmlPage.getDocumentElement()
134 .getHtmlElementsByTagName(ANCHOR);
135
136 for (final HtmlAnchor anchor : anchors) {
137 hrefMap.put(anchor.asText(), anchor.getHrefAttribute());
138 LOGGER.info("homepage:" + anchor.asText() + " - "
139 + anchor.getHrefAttribute());
140 }
141 } catch (final Exception e) {
142 LOGGER.warn("Problem Loading Parliament web site info", e);
143 }
144
145 try {
146 final HtmlPage htmlPage = (HtmlPage) webClient
147 .getPage(PARLIAMENT_MEMBER_SWEDISH_WIKI_LIST);
148 final List<HtmlTable> tables = htmlPage.getDocumentElement().getElementsByAttribute("table", "id",
149 "sortable_table_id_0");
150
151 if (tables != null) {
152
153 if (tables.iterator().hasNext()) {
154
155 final HtmlTable table = tables.iterator().next();
156
157 final List<HtmlTableRow> rows = table.getRows();
158
159 for (final HtmlTableRow row : rows) {
160 if (row.getCells().size() > 1) {
161 final HtmlTableCell cell = row.getCell(1);
162
163 final List<HtmlAnchor> anchors = cell
164 .getHtmlElementsByTagName(ANCHOR);
165
166 HtmlAnchor anchor = null;
167 if (anchors.size() > 0) {
168 if (!cell.asText().contains("ersatt av")) {
169 anchor = anchors.get(0);
170 } else {
171 anchor = anchors.get(anchors.size() - 1);
172 }
173
174 final String name = extractName(anchor);
175
176 final String href = "http://sv.wikipedia.org"
177 + anchor.getHrefAttribute();
178
179 LOGGER.info("wiki sv: " + name + " - " + href);
180 wikiHrefMap.put(name, href);
181
182 }
183 }
184 }
185 }
186 } else {
187 LOGGER.warn("Problem finding wiki links on page : " + PARLIAMENT_MEMBER_SWEDISH_WIKI_LIST );
188 }
189
190 } catch (final Exception e) {
191 LOGGER.warn("Problem Loading Parliament Swedish wiki site info", e);
192 }
193
194 try {
195 final HtmlPage htmlPage = (HtmlPage) webClient
196 .getPage(PARLIAMENT_MEMBER_ENGLISH_WIKI_LIST);
197 final List<HtmlTable> tables = htmlPage.getDocumentElement()
198 .getElementsByAttribute("table", "class", "wikitable");
199
200 final HtmlTable table = tables.get(1);
201
202 final List<HtmlTableRow> rows = table.getRows();
203
204 for (final HtmlTableRow row : rows) {
205 if (row.getCells().size() > 2) {
206 final HtmlTableCell cell = row.getCell(2);
207
208 final List<HtmlAnchor> anchors = cell
209 .getHtmlElementsByTagName(ANCHOR);
210
211 HtmlAnchor anchor = null;
212 if (anchors.size() > 0) {
213 if (!(cell.asText().contains("substituted") || cell
214 .asText().contains("replaced"))) {
215 anchor = anchors.get(0);
216 } else {
217 anchor = anchors.get(anchors.size() - 1);
218 }
219
220 final String name = extractName(anchor);
221
222 final String href = "http://en.wikipedia.org"
223 + anchor.getHrefAttribute();
224
225 LOGGER.info("wiki en: " + name + " - " + href);
226 englishWikiHrefMap.put(name, href);
227 }
228 }
229 }
230
231 } catch (final Exception e) {
232 LOGGER.warn("Problem Loading Parliament English wiki site info", e);
233 }
234 }
235 }