Coverage for ckanext/udc/tests/graph/test_mapping_helpers.py: 99%

236 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2026-01-19 23:48 +0000

1""" 

2Tests for graph/mapping_helpers.py - Helper functions for JSON-LD mapping. 

3 

4Tests all helper functions that are available in the mapping configuration, 

5including UUID generation, type conversions, URL quoting, and CKAN-specific mappings. 

6""" 

7import pytest 

8from datetime import datetime 

9from rdflib import Literal, XSD 

10from unittest.mock import Mock, patch 

11 

12from ckanext.udc.graph.mapping_helpers import ( 

13 generate_uuid, 

14 to_integer, 

15 to_float, 

16 to_date, 

17 to_bool, 

18 mapFromCKANLicense, 

19 split_to_uris, 

20 quote_url, 

21 mapFromCKANTags, 

22 map_from_tags_multiple_languages, 

23 map_to_multiple_languages, 

24 map_to_single_language, 

25 map_to_multiple_datasets, 

26 uuidMap, 

27 licenseMap 

28) 

29from ckanext.udc.graph.contants import EMPTY_FIELD 

30 

31 

32class TestGenerateUuid: 

33 """Test UUID generation with and without keys.""" 

34 

35 def test_generate_uuid_without_key(self): 

36 """Test that UUID is generated without a key.""" 

37 uuid1 = generate_uuid() 

38 uuid2 = generate_uuid() 

39 

40 assert uuid1 != uuid2 

41 assert len(uuid1) == 36 # Standard UUID format 

42 

43 def test_generate_uuid_with_key(self): 

44 """Test that same key returns same UUID.""" 

45 uuid1 = generate_uuid("test_key") 

46 uuid2 = generate_uuid("test_key") 

47 

48 assert uuid1 == uuid2 

49 

50 def test_generate_uuid_different_keys(self): 

51 """Test that different keys return different UUIDs.""" 

52 uuid1 = generate_uuid("key1") 

53 uuid2 = generate_uuid("key2") 

54 

55 assert uuid1 != uuid2 

56 

57 def test_uuid_persistence_in_map(self): 

58 """Test that UUID is stored in uuidMap.""" 

59 key = "persistent_key" 

60 uuid = generate_uuid(key) 

61 

62 assert key in uuidMap 

63 assert uuidMap[key] == uuid 

64 

65 

66class TestTypeConversions: 

67 """Test type conversion functions.""" 

68 

69 def test_to_integer_valid(self): 

70 """Test converting valid string to integer.""" 

71 assert to_integer("42") == 42 

72 assert to_integer("0") == 0 

73 assert to_integer("-10") == -10 

74 

75 def test_to_integer_invalid(self): 

76 """Test that invalid strings raise ValueError.""" 

77 with pytest.raises(ValueError): 

78 to_integer("not a number") 

79 

80 def test_to_float_valid(self): 

81 """Test converting valid string to float.""" 

82 assert to_float("3.14") == 3.14 

83 assert to_float("0.0") == 0.0 

84 assert to_float("-2.5") == -2.5 

85 

86 def test_to_float_invalid(self): 

87 """Test that invalid strings raise ValueError.""" 

88 with pytest.raises(ValueError): 

89 to_float("not a float") 

90 

91 def test_to_date_valid(self): 

92 """Test converting valid date string to XSD date.""" 

93 result = to_date("2025-01-15") 

94 assert isinstance(result, Literal) 

95 assert result.datatype == XSD.date 

96 

97 def test_to_date_empty_string(self): 

98 """Test that empty string returns EMPTY_FIELD.""" 

99 assert to_date("") == EMPTY_FIELD 

100 

101 def test_to_date_empty_field(self): 

102 """Test that EMPTY_FIELD returns EMPTY_FIELD.""" 

103 assert to_date(EMPTY_FIELD) == EMPTY_FIELD 

104 

105 def test_to_bool_yes(self): 

106 """Test converting 'yes' to boolean.""" 

107 assert to_bool("yes") == "true" 

108 assert to_bool("Yes") == "true" 

109 assert to_bool("YES") == "true" 

110 

111 def test_to_bool_no(self): 

112 """Test converting 'no' to boolean.""" 

113 assert to_bool("no") == "false" 

114 assert to_bool("No") == "false" 

115 assert to_bool("NO") == "false" 

116 

117 def test_to_bool_other(self): 

118 """Test that other values don't return true/false.""" 

119 assert to_bool("maybe") is None 

120 assert to_bool("") is None 

121 

122 

123class TestMapFromCKANLicense: 

124 """Test CKAN license mapping.""" 

125 

126 def setUp(self): 

127 """Clear license map before each test.""" 

128 licenseMap.clear() 

129 

130 @patch('ckan.model.Package.get_license_register') 

131 def test_map_license_by_id(self, mock_get_register): 

132 """Test mapping license by ID.""" 

133 mock_license = Mock() 

134 mock_license.url = "http://creativecommons.org/licenses/by/4.0/" 

135 mock_get_register.return_value = {"cc-by": mock_license} 

136 

137 licenseMap.clear() 

138 result = mapFromCKANLicense("cc-by") 

139 

140 assert len(result) == 1 

141 assert result[0]["@id"] == "http://creativecommons.org/licenses/by/4.0/" 

142 

143 @patch('ckan.model.Package.get_license_register') 

144 def test_map_license_by_url(self, mock_get_register): 

145 """Test mapping license by URL.""" 

146 mock_get_register.return_value = {} 

147 

148 licenseMap.clear() 

149 url = "http://example.com/custom-license" 

150 result = mapFromCKANLicense(url) 

151 

152 assert len(result) == 1 

153 assert result[0]["@id"] == url 

154 

155 @patch('ckan.model.Package.get_license_register') 

156 def test_map_license_without_url(self, mock_get_register): 

157 """Test mapping license ID without URL in registry.""" 

158 mock_license = Mock() 

159 mock_license.url = None 

160 mock_get_register.return_value = {"custom": mock_license} 

161 

162 licenseMap.clear() 

163 result = mapFromCKANLicense("custom") 

164 

165 assert len(result) == 1 

166 assert result[0]["@id"] == "http://data.urbandatacentre.ca/licenses/custom" 

167 

168 

169class TestSplitToUris: 

170 """Test URI splitting function.""" 

171 

172 def test_split_comma_separated(self): 

173 """Test splitting comma-separated URIs.""" 

174 result = split_to_uris("csv,json,xml") 

175 

176 assert len(result) == 3 

177 assert {"@id": "csv"} in result 

178 assert {"@id": "json"} in result 

179 assert {"@id": "xml"} in result 

180 

181 def test_split_custom_separator(self): 

182 """Test splitting with custom separator.""" 

183 result = split_to_uris("csv|json|xml", separator="|") 

184 

185 assert len(result) == 3 

186 

187 def test_split_single_value(self): 

188 """Test splitting single value.""" 

189 result = split_to_uris("csv") 

190 

191 assert len(result) == 1 

192 assert result[0]["@id"] == "csv" 

193 

194 def test_split_empty_string(self): 

195 """Test splitting empty string.""" 

196 result = split_to_uris("") 

197 

198 assert len(result) == 1 

199 assert result[0]["@id"] == "" 

200 

201 

202class TestQuoteUrl: 

203 """Test URL quoting function.""" 

204 

205 def test_quote_http_url(self): 

206 """Test quoting HTTP URL.""" 

207 url = "http://example.com/path with spaces" 

208 result = quote_url(url) 

209 

210 assert result == "http://example.com/path%20with%20spaces" 

211 

212 def test_quote_https_url(self): 

213 """Test quoting HTTPS URL.""" 

214 url = "https://example.com/path with spaces" 

215 result = quote_url(url) 

216 

217 assert result == "https://example.com/path%20with%20spaces" 

218 

219 def test_quote_preserves_slashes(self): 

220 """Test that slashes are preserved in path.""" 

221 url = "http://example.com/path/to/resource" 

222 result = quote_url(url) 

223 

224 assert result == "http://example.com/path/to/resource" 

225 

226 def test_quote_special_characters(self): 

227 """Test quoting special characters.""" 

228 url = "http://example.com/data?key=value&foo=bar" 

229 result = quote_url(url) 

230 

231 assert "?" in result 

232 assert "&" in result 

233 

234 def test_quote_unicode_characters(self): 

235 """Test quoting unicode characters.""" 

236 url = "http://example.com/données" 

237 result = quote_url(url) 

238 

239 assert "donn%C3%A9es" in result 

240 

241 

242class TestMapFromCKANTags: 

243 """Test CKAN tags mapping.""" 

244 

245 def test_map_single_tag(self): 

246 """Test mapping single tag.""" 

247 result = mapFromCKANTags("housing") 

248 

249 assert len(result) == 1 

250 assert result[0]["@value"] == "housing" 

251 

252 def test_map_multiple_tags(self): 

253 """Test mapping multiple comma-separated tags.""" 

254 result = mapFromCKANTags("housing,transport,health") 

255 

256 assert len(result) == 3 

257 assert {"@value": "housing"} in result 

258 assert {"@value": "transport"} in result 

259 assert {"@value": "health"} in result 

260 

261 def test_map_tags_with_whitespace(self): 

262 """Test that whitespace is stripped from tags.""" 

263 result = mapFromCKANTags("housing , transport , health") 

264 

265 assert result[0]["@value"] == "housing" 

266 assert result[1]["@value"] == "transport" 

267 assert result[2]["@value"] == "health" 

268 

269 

270class TestMapFromTagsMultipleLanguages: 

271 """Test multilingual tags mapping.""" 

272 

273 def test_map_multilingual_tags(self): 

274 """Test mapping tags with multiple languages.""" 

275 tags_dict = { 

276 "en": ["housing", "transport"], 

277 "fr": ["logement", "transport"] 

278 } 

279 

280 result = map_from_tags_multiple_languages(tags_dict) 

281 

282 assert len(result) == 4 

283 assert {"@language": "en", "@value": "housing"} in result 

284 assert {"@language": "en", "@value": "transport"} in result 

285 assert {"@language": "fr", "@value": "logement"} in result 

286 assert {"@language": "fr", "@value": "transport"} in result 

287 

288 def test_map_single_language_tags(self): 

289 """Test mapping tags with single language.""" 

290 tags_dict = {"en": ["housing", "transport"]} 

291 

292 result = map_from_tags_multiple_languages(tags_dict) 

293 

294 assert len(result) == 2 

295 assert all(tag["@language"] == "en" for tag in result) 

296 

297 def test_map_empty_tags(self): 

298 """Test mapping empty tags dictionary.""" 

299 result = map_from_tags_multiple_languages({}) 

300 

301 assert len(result) == 0 

302 

303 

304class TestMapToMultipleLanguages: 

305 """Test mapping values to multiple languages.""" 

306 

307 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

308 def test_map_dict_to_languages(self, mock_get_default_lang): 

309 """Test mapping dictionary to language array.""" 

310 val = {"en": "English", "fr": "Français"} 

311 result = map_to_multiple_languages(val) 

312 

313 assert len(result) == 2 

314 assert {"@language": "en", "@value": "English"} in result 

315 assert {"@language": "fr", "@value": "Français"} in result 

316 

317 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

318 def test_map_string_to_default_language(self, mock_get_default_lang): 

319 """Test mapping string to default language.""" 

320 mock_get_default_lang.return_value = "en" 

321 

322 val = "English text" 

323 result = map_to_multiple_languages(val) 

324 

325 assert len(result) == 1 

326 assert result[0]["@language"] == "en" 

327 assert result[0]["@value"] == "English text" 

328 

329 def test_map_non_dict_non_string(self): 

330 """Test mapping non-dict, non-string value.""" 

331 result = map_to_multiple_languages(123) 

332 

333 assert len(result) == 0 

334 

335 

336class TestMapToSingleLanguage: 

337 """Test mapping values to single language.""" 

338 

339 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

340 def test_map_dict_to_specific_language(self, mock_get_default_lang): 

341 """Test mapping dictionary to specific language.""" 

342 val = {"en": "English", "fr": "Français"} 

343 result = map_to_single_language(val, lang="fr") 

344 

345 assert result == "Français" 

346 

347 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

348 def test_map_dict_to_default_language(self, mock_get_default_lang): 

349 """Test mapping dictionary to default language when requested lang not available.""" 

350 mock_get_default_lang.return_value = "en" 

351 

352 val = {"en": "English", "fr": "Français"} 

353 result = map_to_single_language(val, lang="es") 

354 

355 assert result == "English" 

356 

357 def test_map_string_to_language(self): 

358 """Test mapping string returns as-is.""" 

359 val = "Some text" 

360 result = map_to_single_language(val, lang="en") 

361 

362 assert result == "Some text" 

363 

364 def test_map_non_dict_non_string(self): 

365 """Test mapping non-dict, non-string value.""" 

366 result = map_to_single_language(123, lang="en") 

367 

368 assert result == "" 

369 

370 

371class TestMapToMultipleDatasets: 

372 """Test mapping to multiple datasets.""" 

373 

374 def test_map_datasets_with_ids(self): 

375 """Test mapping datasets with IDs.""" 

376 datasets = [ 

377 {"id": "http://example.com/dataset1"}, 

378 {"id": "http://example.com/dataset2"} 

379 ] 

380 

381 result = map_to_multiple_datasets(datasets) 

382 

383 assert len(result) == 2 

384 assert result[0]["@id"] == "http://example.com/dataset1" 

385 assert result[0]["@type"] == "dcat:Dataset" 

386 assert result[1]["@id"] == "http://example.com/dataset2" 

387 

388 def test_map_datasets_without_ids(self): 

389 """Test mapping datasets without IDs are skipped.""" 

390 datasets = [ 

391 {"title": "Dataset 1"}, 

392 {"id": "http://example.com/dataset2"} 

393 ] 

394 

395 result = map_to_multiple_datasets(datasets) 

396 

397 assert len(result) == 1 

398 assert result[0]["@id"] == "http://example.com/dataset2" 

399 

400 def test_map_empty_datasets(self): 

401 """Test mapping empty dataset list.""" 

402 result = map_to_multiple_datasets([]) 

403 

404 assert len(result) == 0 

405 

406 

407class TestIntegrationScenarios: 

408 """Integration tests combining multiple helper functions.""" 

409 

410 @patch('ckan.model.Package.get_license_register') 

411 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

412 def test_complete_catalogue_mapping(self, mock_get_default_lang, mock_get_register): 

413 """Test a complete catalogue entry mapping scenario.""" 

414 mock_get_default_lang.return_value = "en" 

415 mock_license = Mock() 

416 mock_license.url = "http://creativecommons.org/licenses/by/4.0/" 

417 mock_get_register.return_value = {"cc-by": mock_license} 

418 

419 # Simulate mapping a catalogue entry 

420 licenseMap.clear() 

421 

422 title = map_to_multiple_languages({"en": "Housing Data", "fr": "Données sur le logement"}) 

423 tags = map_from_tags_multiple_languages({"en": ["housing", "urban"], "fr": ["logement", "urbain"]}) 

424 license_info = mapFromCKANLicense("cc-by") 

425 formats = split_to_uris("csv,json") 

426 published = to_date("2025-01-01") 

427 url = quote_url("http://example.com/data with spaces") 

428 

429 assert len(title) == 2 

430 assert len(tags) == 4 

431 assert license_info[0]["@id"] == "http://creativecommons.org/licenses/by/4.0/" 

432 assert len(formats) == 2 

433 assert isinstance(published, Literal) 

434 assert "with%20spaces" in url