Coverage for ckanext/udc/tests/graph/test_integration.py: 99%

170 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2026-01-19 23:48 +0000

1""" 

2Integration tests for the complete graph transformation pipeline. 

3 

4Tests the end-to-end transformation from CKAN package dictionaries 

5to JSON-LD and RDF graphs using the real mapping configuration. 

6""" 

7import pytest 

8import json 

9from unittest.mock import Mock, patch 

10from rdflib import Graph, URIRef, Literal, Namespace 

11from pyld import jsonld 

12 

13from ckanext.udc.graph.template import compile_template 

14from ckanext.udc.graph.mapping_helpers import all_helpers 

15from ckanext.udc.graph.ckan_field import prepare_data_dict 

16from ckanext.udc.graph.contants import EMPTY_FIELD 

17 

18 

19@pytest.fixture(autouse=True) 

20def mock_udc_plugin(): 

21 """Mock the UDC plugin for all tests.""" 

22 with patch('ckanext.udc.graph.template.get_plugin') as mock_get_plugin: 

23 mock_plugin = Mock() 

24 mock_plugin.text_fields = [] 

25 mock_get_plugin.return_value = mock_plugin 

26 yield mock_plugin 

27 

28 

29class TestBasicTransformation: 

30 """Test basic transformation scenarios.""" 

31 

32 def test_minimal_catalogue_entry(self): 

33 """Test transforming a minimal catalogue entry.""" 

34 mapping = { 

35 "@context": { 

36 "dct": "http://purl.org/dc/terms/", 

37 "xsd": "http://www.w3.org/2001/XMLSchema#" 

38 }, 

39 "@id": "http://data.urbandatacentre.ca/catalogue/{id}", 

40 "@type": "http://data.urbandatacentre.ca/catalogue", 

41 "dct:title": "{title}" 

42 } 

43 

44 data_dict = { 

45 "id": "dataset-001", 

46 "title_translated": "Test Dataset", 

47 "name": "test-dataset" 

48 } 

49 

50 prepared_dict = prepare_data_dict(data_dict) 

51 result = compile_template( 

52 [mapping], 

53 all_helpers, 

54 prepared_dict 

55 ) 

56 

57 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/dataset-001" 

58 assert result["@type"] == "http://data.urbandatacentre.ca/catalogue" 

59 assert result["dct:title"] == "Test Dataset" 

60 

61 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

62 def test_multilingual_catalogue_entry(self, mock_get_default_lang): 

63 """Test transforming a multilingual catalogue entry.""" 

64 mock_get_default_lang.return_value = "en" 

65 

66 def map_to_multiple_languages(val): 

67 if isinstance(val, dict): 

68 return [{"@language": lang, "@value": value} 

69 for lang, value in val.items()] 

70 return [{"@language": "en", "@value": val}] 

71 

72 mapping = { 

73 "@context": {"dct": "http://purl.org/dc/terms/"}, 

74 "@id": "http://data.urbandatacentre.ca/catalogue/{id}", 

75 "dct:title": "eval(map_to_multiple_languages(title))", 

76 "dct:description": "eval(map_to_multiple_languages(description))" 

77 } 

78 

79 data_dict = { 

80 "id": "dataset-002", 

81 "title_translated": { 

82 "en": "Housing Data", 

83 "fr": "Données sur le logement" 

84 }, 

85 "description_translated": { 

86 "en": "Urban housing statistics", 

87 "fr": "Statistiques sur le logement urbain" 

88 }, 

89 "name": "housing-data" 

90 } 

91 

92 prepared_dict = prepare_data_dict(data_dict) 

93 helpers = {**all_helpers, "map_to_multiple_languages": map_to_multiple_languages} 

94 

95 result = compile_template( 

96 [mapping], 

97 helpers, 

98 prepared_dict 

99 ) 

100 

101 assert len(result["dct:title"]) == 2 

102 assert {"@language": "en", "@value": "Housing Data"} in result["dct:title"] 

103 assert {"@language": "fr", "@value": "Données sur le logement"} in result["dct:title"] 

104 assert len(result["dct:description"]) == 2 

105 

106 

107class TestComplexTransformation: 

108 """Test complex transformation scenarios with nested structures.""" 

109 

110 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

111 def test_catalogue_with_publisher(self, mock_get_default_lang): 

112 """Test transformation with nested publisher information.""" 

113 mock_get_default_lang.return_value = "en" 

114 

115 def map_to_multiple_languages(val): 

116 if isinstance(val, dict): 

117 return [{"@language": lang, "@value": value} 

118 for lang, value in val.items()] 

119 return [{"@language": "en", "@value": val}] 

120 

121 mapping = { 

122 "@context": { 

123 "dct": "http://purl.org/dc/terms/", 

124 "foaf": "http://xmlns.com/foaf/0.1/" 

125 }, 

126 "@id": "http://data.urbandatacentre.ca/catalogue/{id}", 

127 "dct:title": "eval(map_to_multiple_languages(title))", 

128 "dct:publisher": [{ 

129 "@id": "http://data.urbandatacentre.ca/org/{publisher_id}", 

130 "@type": "foaf:Agent", 

131 "foaf:name": "{publisher}", 

132 "foaf:mbox": "{publisher_email}" 

133 }] 

134 } 

135 

136 data_dict = { 

137 "id": "dataset-003", 

138 "title_translated": {"en": "Transport Data"}, 

139 "name": "transport-data", 

140 "publisher": "Urban Planning Department", 

141 "publisher_email": "planning@city.ca", 

142 "publisher_id": "upd-001" 

143 } 

144 

145 prepared_dict = prepare_data_dict(data_dict) 

146 helpers = {**all_helpers, "map_to_multiple_languages": map_to_multiple_languages} 

147 

148 result = compile_template( 

149 [mapping], 

150 helpers, 

151 prepared_dict 

152 ) 

153 

154 assert len(result["dct:publisher"]) == 1 

155 publisher = result["dct:publisher"][0] 

156 assert publisher["@id"] == "http://data.urbandatacentre.ca/org/upd-001" 

157 assert publisher["@type"] == "foaf:Agent" 

158 assert publisher["foaf:name"] == "Urban Planning Department" 

159 assert publisher["foaf:mbox"] == "planning@city.ca" 

160 

161 def test_optional_fields_removed(self): 

162 """Test that optional empty fields are removed from output.""" 

163 mapping = { 

164 "@id": "http://example.com/{id}", 

165 "required": "{title}", 

166 "optional1": "{description}", 

167 "optional2": "{notes}", 

168 "nested": [{ 

169 "field1": "{field1}", 

170 "field2": "{field2}" 

171 }] 

172 } 

173 

174 data_dict = { 

175 "id": "test-001", 

176 "title": "Required Title", 

177 "description": "", # Empty 

178 # notes is missing 

179 "field1": "Value 1" 

180 # field2 is missing 

181 } 

182 

183 prepared_dict = prepare_data_dict(data_dict) 

184 result = compile_template( 

185 [mapping], 

186 all_helpers, 

187 prepared_dict 

188 ) 

189 

190 assert result["required"] == "Required Title" 

191 assert "optional1" not in result 

192 assert "optional2" not in result 

193 assert len(result["nested"]) == 1 

194 assert result["nested"][0]["field1"] == "Value 1" 

195 assert "field2" not in result["nested"][0] 

196 

197 

198class TestDateAndTypeTransformations: 

199 """Test transformations involving dates and type conversions.""" 

200 

201 def test_date_transformation(self): 

202 """Test date field transformation to XSD date.""" 

203 mapping = { 

204 "@id": "http://example.com/{id}", 

205 "dct:issued": { 

206 "@type": "xsd:date", 

207 "@value": "{to_date(published_date)}" 

208 } 

209 } 

210 

211 data_dict = { 

212 "id": "test-001", 

213 "published_date": "2025-01-15" 

214 } 

215 

216 prepared_dict = prepare_data_dict(data_dict) 

217 result = compile_template( 

218 [mapping], 

219 all_helpers, 

220 prepared_dict 

221 ) 

222 

223 assert result["dct:issued"][0]["@type"] == "xsd:date" 

224 assert "2025-01-15" in str(result["dct:issued"][0]["@value"]) 

225 

226 def test_boolean_transformation(self): 

227 """Test boolean field transformation.""" 

228 mapping = { 

229 "@id": "http://example.com/{id}", 

230 "hasData": { 

231 "@type": "xsd:boolean", 

232 "@value": "{to_bool(contains_data)}" 

233 } 

234 } 

235 

236 data_dict = { 

237 "id": "test-001", 

238 "contains_data": "yes" 

239 } 

240 

241 prepared_dict = prepare_data_dict(data_dict) 

242 result = compile_template( 

243 [mapping], 

244 all_helpers, 

245 prepared_dict 

246 ) 

247 

248 assert result["hasData"][0]["@value"] == "true" 

249 

250 def test_empty_date_removed(self): 

251 """Test that empty dates are removed from output.""" 

252 mapping = { 

253 "@id": "http://example.com/{id}", 

254 "title": "{title}", 

255 "issued": { 

256 "@type": "xsd:date", 

257 "@value": "{to_date(published_date)}" 

258 } 

259 } 

260 

261 data_dict = { 

262 "id": "test-001", 

263 "title": "Test", 

264 "published_date": "" 

265 } 

266 

267 prepared_dict = prepare_data_dict(data_dict) 

268 result = compile_template( 

269 [mapping], 

270 all_helpers, 

271 prepared_dict 

272 ) 

273 

274 assert "title" in result 

275 assert "issued" not in result 

276 

277 

278class TestLicenseAndFormatTransformations: 

279 """Test license and file format transformations.""" 

280 

281 @patch('ckan.model.Package.get_license_register') 

282 def test_license_mapping(self, mock_get_register): 

283 """Test CKAN license mapping.""" 

284 mock_license = Mock() 

285 mock_license.url = "http://creativecommons.org/licenses/by/4.0/" 

286 mock_get_register.return_value = {"cc-by": mock_license} 

287 

288 mapping = { 

289 "@id": "http://example.com/{id}", 

290 "dct:license": "eval(mapFromCKANLicense(license_id))" 

291 } 

292 

293 data_dict = { 

294 "id": "test-001", 

295 "license_id": "cc-by" 

296 } 

297 

298 # Clear license map 

299 from ckanext.udc.graph.mapping_helpers import licenseMap 

300 licenseMap.clear() 

301 

302 prepared_dict = prepare_data_dict(data_dict) 

303 result = compile_template( 

304 [mapping], 

305 all_helpers, 

306 prepared_dict 

307 ) 

308 

309 assert len(result["dct:license"]) == 1 

310 assert result["dct:license"][0]["@id"] == "http://creativecommons.org/licenses/by/4.0/" 

311 

312 def test_format_list_transformation(self): 

313 """Test file format list transformation.""" 

314 mapping = { 

315 "@id": "http://example.com/{id}", 

316 "dct:format": "eval(split_to_uris(file_format))" 

317 } 

318 

319 data_dict = { 

320 "id": "test-001", 

321 "file_format": "csv,json,xml" 

322 } 

323 

324 prepared_dict = prepare_data_dict(data_dict) 

325 result = compile_template( 

326 [mapping], 

327 all_helpers, 

328 prepared_dict 

329 ) 

330 

331 assert len(result["dct:format"]) == 3 

332 assert {"@id": "csv"} in result["dct:format"] 

333 assert {"@id": "json"} in result["dct:format"] 

334 assert {"@id": "xml"} in result["dct:format"] 

335 

336 

337class TestURLTransformations: 

338 """Test URL quoting and transformation.""" 

339 

340 def test_url_quoting(self): 

341 """Test URL quoting with spaces.""" 

342 mapping = { 

343 "@id": "http://example.com/{id}", 

344 "dcat:accessURL": { 

345 "@id": "eval(quote_url(location))" 

346 } 

347 } 

348 

349 data_dict = { 

350 "id": "test-001", 

351 "location": "http://example.com/data with spaces" 

352 } 

353 

354 prepared_dict = prepare_data_dict(data_dict) 

355 result = compile_template( 

356 [mapping], 

357 all_helpers, 

358 prepared_dict 

359 ) 

360 

361 assert "with%20spaces" in result["dcat:accessURL"][0]["@id"] 

362 assert "http://example.com/" in result["dcat:accessURL"][0]["@id"] 

363 

364 

365class TestTagTransformations: 

366 """Test tag transformation scenarios.""" 

367 

368 def test_multilingual_tags(self): 

369 """Test multilingual tags transformation.""" 

370 def map_from_tags_multiple_languages(tags_dict): 

371 tags = [] 

372 for lang, tags_list in tags_dict.items(): 

373 for tag in tags_list: 

374 tags.append({ 

375 "@language": lang, 

376 "@value": tag.strip() 

377 }) 

378 return tags 

379 

380 mapping = { 

381 "@id": "http://example.com/{id}", 

382 "dcat:keyword": "eval(map_from_tags_multiple_languages(tags))" 

383 } 

384 

385 data_dict = { 

386 "id": "test-001", 

387 "tags_translated": { 

388 "en": ["housing", "transport"], 

389 "fr": ["logement", "transport"] 

390 }, 

391 "name": "test" 

392 } 

393 

394 prepared_dict = prepare_data_dict(data_dict) 

395 helpers = { 

396 **all_helpers, 

397 "map_from_tags_multiple_languages": map_from_tags_multiple_languages 

398 } 

399 

400 result = compile_template( 

401 [mapping], 

402 helpers, 

403 prepared_dict 

404 ) 

405 

406 assert len(result["dcat:keyword"]) == 4 

407 assert {"@language": "en", "@value": "housing"} in result["dcat:keyword"] 

408 assert {"@language": "fr", "@value": "logement"} in result["dcat:keyword"] 

409 

410 

411class TestRDFGeneration: 

412 """Test generation of actual RDF from compiled templates.""" 

413 

414 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

415 def test_jsonld_to_rdf_conversion(self, mock_get_default_lang): 

416 """Test that compiled JSON-LD can be parsed as RDF.""" 

417 mock_get_default_lang.return_value = "en" 

418 

419 mapping = { 

420 "@context": { 

421 "dct": "http://purl.org/dc/terms/", 

422 "xsd": "http://www.w3.org/2001/XMLSchema#" 

423 }, 

424 "@id": "http://data.urbandatacentre.ca/catalogue/{id}", 

425 "@type": "http://www.w3.org/ns/dcat#Dataset", 

426 "dct:title": "{title}", 

427 "dct:issued": { 

428 "@type": "xsd:date", 

429 "@value": "{to_date(published_date)}" 

430 } 

431 } 

432 

433 data_dict = { 

434 "id": "dataset-001", 

435 "title_translated": "Test Dataset", 

436 "name": "test-dataset", 

437 "published_date": "2025-01-01" 

438 } 

439 

440 prepared_dict = prepare_data_dict(data_dict) 

441 compiled = compile_template( 

442 [mapping], 

443 all_helpers, 

444 prepared_dict 

445 ) 

446 

447 # Parse as RDF 

448 g = Graph() 

449 g.parse(data=compiled, format='json-ld') 

450 

451 # Verify triples exist 

452 DCT = Namespace("http://purl.org/dc/terms/") 

453 DCAT = Namespace("http://www.w3.org/ns/dcat#") 

454 

455 subject = URIRef("http://data.urbandatacentre.ca/catalogue/dataset-001") 

456 

457 # Check that subject exists 

458 assert (subject, None, None) in g 

459 

460 # Check title exists 

461 titles = list(g.objects(subject, DCT.title)) 

462 assert len(titles) > 0 

463 

464 # Check type 

465 types = list(g.objects(subject, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"))) 

466 assert DCAT.Dataset in types 

467 

468 

469class TestErrorHandling: 

470 """Test error handling in transformations.""" 

471 

472 def test_invalid_eval_expression(self): 

473 """Test that invalid eval expressions are handled gracefully.""" 

474 mapping = { 

475 "@id": "http://example.com/{id}", 

476 "invalid": "eval(nonexistent_function())" 

477 } 

478 

479 data_dict = { 

480 "id": "test-001" 

481 } 

482 

483 prepared_dict = prepare_data_dict(data_dict) 

484 result = compile_template( 

485 [mapping], 

486 all_helpers, 

487 prepared_dict 

488 ) 

489 

490 # Invalid field should be removed 

491 assert "invalid" not in result 

492 

493 def test_missing_required_variable(self): 

494 """Test handling of missing required variables.""" 

495 mapping = { 

496 "@id": "http://example.com/{missing_id}", 

497 "title": "{title}" 

498 } 

499 

500 data_dict = { 

501 "title": "Test" 

502 } 

503 

504 prepared_dict = prepare_data_dict(data_dict) 

505 result = compile_template( 

506 [mapping], 

507 all_helpers, 

508 prepared_dict 

509 ) 

510 

511 # Should remove @id if variable is missing 

512 assert "@id" not in result or "missing_id" in result.get("@id", "")