Coverage for ckanext/udc/tests/graph/test_config_validation.py: 96%

168 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2026-01-19 23:48 +0000

1""" 

2Tests validating the actual config.example.json mapping configuration. 

3 

4These tests validate that the real mapping configuration from config.example.json 

5works correctly with realistic CKAN data. 

6""" 

7import pytest 

8import json 

9from unittest.mock import Mock, patch 

10from pathlib import Path 

11from rdflib import Graph, URIRef, Namespace 

12 

13from ckanext.udc.graph.template import compile_template 

14from ckanext.udc.graph.mapping_helpers import all_helpers, licenseMap 

15from ckanext.udc.graph.ckan_field import prepare_data_dict 

16 

17 

18# Load actual config 

19CONFIG_PATH = Path(__file__).parent.parent.parent / "config.example.json" 

20 

21 

22@pytest.fixture(autouse=True) 

23def mock_udc_plugin(): 

24 """Mock the UDC plugin for all tests.""" 

25 with patch('ckanext.udc.graph.template.get_plugin') as mock_get_plugin: 

26 mock_plugin = Mock() 

27 mock_plugin.text_fields = ['title', 'description'] 

28 mock_get_plugin.return_value = mock_plugin 

29 yield mock_plugin 

30 

31 

32@pytest.fixture 

33def config(): 

34 """Load the actual config.example.json file.""" 

35 with open(CONFIG_PATH, 'r') as f: 

36 return json.load(f) 

37 

38 

39@pytest.fixture 

40def extended_helpers(): 

41 """Extended helpers including those needed for config.example.json.""" 

42 def map_to_multiple_languages(val): 

43 if isinstance(val, dict): 

44 return [{"@language": lang, "@value": value} 

45 for lang, value in val.items()] 

46 return [{"@language": "en", "@value": val}] 

47 

48 def map_from_tags_multiple_languages(tags_dict): 

49 tags = [] 

50 for lang, tags_list in tags_dict.items(): 

51 for tag in tags_list: 

52 tags.append({"@language": lang, "@value": tag.strip()}) 

53 return tags 

54 

55 def map_to_multiple_datasets(datasets): 

56 result = [] 

57 for ds in datasets: 

58 ds_id = ds.get("id") 

59 if ds_id: 

60 result.append({ 

61 "@id": ds_id, 

62 "dcat:landingPage": ds_id, 

63 "dcat:accessURL": ds_id, 

64 "@type": "dcat:Dataset" 

65 }) 

66 return result 

67 

68 return { 

69 **all_helpers, 

70 "map_to_multiple_languages": map_to_multiple_languages, 

71 "map_from_tags_multiple_languages": map_from_tags_multiple_languages, 

72 "map_to_multiple_datasets": map_to_multiple_datasets 

73 } 

74 

75 

76class TestConfigStructure: 

77 """Test the structure of config.example.json.""" 

78 

79 def test_config_loads(self, config): 

80 """Test that config.example.json loads successfully.""" 

81 assert config is not None 

82 assert isinstance(config, dict) 

83 

84 def test_has_maturity_model(self, config): 

85 """Test that config has maturity_model section.""" 

86 assert "maturity_model" in config 

87 assert isinstance(config["maturity_model"], list) 

88 assert len(config["maturity_model"]) == 6 # 6 maturity levels 

89 

90 def test_has_mappings(self, config): 

91 """Test that config has mappings section.""" 

92 assert "mappings" in config 

93 assert isinstance(config["mappings"], dict) 

94 

95 def test_mappings_has_context(self, config): 

96 """Test that mappings have @context.""" 

97 assert "@context" in config["mappings"] 

98 assert isinstance(config["mappings"]["@context"], dict) 

99 

100 def test_mappings_has_id(self, config): 

101 """Test that mappings have @id.""" 

102 assert "@id" in config["mappings"] 

103 

104 def test_maturity_levels_structure(self, config): 

105 """Test maturity level structure.""" 

106 for level in config["maturity_model"]: 

107 assert "title" in level 

108 assert "name" in level 

109 assert "fields" in level 

110 assert isinstance(level["fields"], list) 

111 

112 

113class TestMaturityLevel1Fields: 

114 """Test Maturity Level 1 (Basic Information) fields mapping.""" 

115 

116 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

117 def test_basic_fields_transformation(self, mock_get_default_lang, config, extended_helpers): 

118 """Test transformation of basic maturity level 1 fields.""" 

119 mock_get_default_lang.return_value = "en" 

120 

121 data_dict = { 

122 "id": "housing-2025", 

123 "name": "housing-data-2025", 

124 "title_translated": { 

125 "en": "Housing Statistics 2025", 

126 "fr": "Statistiques sur le logement 2025" 

127 }, 

128 "description_translated": { 

129 "en": "Comprehensive housing data", 

130 "fr": "Données complètes sur le logement" 

131 }, 

132 "tags_translated": { 

133 "en": ["housing", "statistics"], 

134 "fr": ["logement", "statistiques"] 

135 }, 

136 "theme": "Housing", 

137 "file_format": "csv,json", 

138 "file_size": "125.5", 

139 "unique_metadata_identifier": "meta-001", 

140 "published_date": "2025-01-01", 

141 "time_span_start": "2024-01-01", 

142 "time_span_end": "2024-12-31", 

143 "geo_span": "Toronto" 

144 } 

145 

146 prepared_dict = prepare_data_dict(data_dict) 

147 mapping = config["mappings"] 

148 

149 result = compile_template( 

150 [mapping], 

151 extended_helpers, 

152 prepared_dict 

153 ) 

154 

155 # Validate basic structure 

156 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/housing-2025" 

157 assert result["@type"] == "http://data.urbandatacentre.ca/catalogue" 

158 

159 # Validate multilingual title 

160 assert "dct:title" in result 

161 assert len(result["dct:title"][0]) == 2 

162 

163 # Validate theme 

164 assert "dcat:theme" in result 

165 

166 # Validate file size 

167 assert "cudr:file_size" in result 

168 

169 

170class TestMaturityLevel2Fields: 

171 """Test Maturity Level 2 (Access) fields mapping.""" 

172 

173 @patch('ckan.model.Package.get_license_register') 

174 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

175 def test_access_fields_transformation(self, mock_get_default_lang, mock_get_register, 

176 config, extended_helpers): 

177 """Test transformation of access-related fields.""" 

178 mock_get_default_lang.return_value = "en" 

179 mock_license = Mock() 

180 mock_license.url = "http://creativecommons.org/licenses/by/4.0/" 

181 mock_get_register.return_value = {"cc-by": mock_license} 

182 

183 licenseMap.clear() 

184 

185 data_dict = { 

186 "id": "dataset-002", 

187 "name": "test-dataset", 

188 "title_translated": {"en": "Test"}, 

189 "access_category": "Open", 

190 "license_id": "cc-by", 

191 "limits_on_use": "Academic use only", 

192 "location": "http://example.com/data", 

193 "data_service": "http://ckan.example.com", 

194 "owner": "Data Owner Org", 

195 "access_steward": "John Doe", 

196 "access_steward_email": "john@example.com", 

197 "publisher": "Publishing Org", 

198 "publisher_email": "pub@example.com" 

199 } 

200 

201 prepared_dict = prepare_data_dict(data_dict) 

202 mapping = config["mappings"] 

203 

204 result = compile_template( 

205 [mapping], 

206 extended_helpers, 

207 prepared_dict 

208 ) 

209 

210 # Validate license mapping 

211 assert "dct:license" in result 

212 assert len(result["dct:license"]) == 1 

213 

214 # Validate access URL 

215 assert "dcat:accessURL" in result 

216 

217 # Validate publisher 

218 assert "dct:publisher" in result 

219 

220 # Validate contact point 

221 assert "dcat:contactPoint" in result 

222 

223 

224class TestMaturityLevel3Fields: 

225 """Test Maturity Level 3 (Content) fields mapping.""" 

226 

227 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

228 def test_content_fields_transformation(self, mock_get_default_lang, config, extended_helpers): 

229 """Test transformation of content-related fields.""" 

230 mock_get_default_lang.return_value = "en" 

231 

232 data_dict = { 

233 "id": "dataset-003", 

234 "name": "test-dataset", 

235 "title_translated": {"en": "Test"}, 

236 "accessed_date": "2025-01-15", 

237 "description_document": "http://example.com/docs", 

238 "language": "en,fr", 

239 "persistent_identifier": "yes", 

240 "global_unique_identifier": "yes", 

241 "file_format": "csv,json", 

242 "source": "http://example.com/source", 

243 "version": "1.0", 

244 "version_dataset": {"url": "http://example.com/v1", "title": "Version 1"}, 

245 "dataset_versions": [ 

246 {"url": "http://example.com/v2", "title": "Version 2"} 

247 ], 

248 "provenance": "Original data collected in 2024", 

249 "provenance_url": "http://example.com/provenance" 

250 } 

251 

252 prepared_dict = prepare_data_dict(data_dict) 

253 mapping = config["mappings"] 

254 

255 result = compile_template( 

256 [mapping], 

257 extended_helpers, 

258 prepared_dict 

259 ) 

260 

261 # Validate language mapping 

262 assert "dct:language" in result 

263 

264 # Validate FAIR indicators 

265 assert "fair:rda-f1-01d" in result 

266 assert "fair:rda-f1-02d" in result 

267 

268 

269class TestCompleteMaturityLevels: 

270 """Test transformation with data from all maturity levels.""" 

271 

272 @patch('ckan.model.Package.get_license_register') 

273 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

274 def test_complete_catalogue_entry(self, mock_get_default_lang, mock_get_register, 

275 config, extended_helpers): 

276 """Test transformation with comprehensive data across all maturity levels.""" 

277 mock_get_default_lang.return_value = "en" 

278 mock_license = Mock() 

279 mock_license.url = "http://opendatacommons.org/licenses/odbl/" 

280 mock_get_register.return_value = {"odc-odbl": mock_license} 

281 

282 licenseMap.clear() 

283 

284 # Comprehensive catalogue entry with fields from all maturity levels 

285 data_dict = { 

286 # Basic Information (Level 1) 

287 "id": "comprehensive-dataset-001", 

288 "name": "comprehensive-housing-data", 

289 "title_translated": { 

290 "en": "Comprehensive Housing Data 2025", 

291 "fr": "Données complètes sur le logement 2025" 

292 }, 

293 "description_translated": { 

294 "en": "Complete housing statistics for urban areas", 

295 "fr": "Statistiques complètes sur le logement urbain" 

296 }, 

297 "tags_translated": { 

298 "en": ["housing", "statistics", "urban"], 

299 "fr": ["logement", "statistiques", "urbain"] 

300 }, 

301 "theme": "Housing", 

302 "file_format": "csv,json,xml", 

303 "file_size": "250.75", 

304 "unique_metadata_identifier": "meta-comprehensive-001", 

305 "published_date": "2025-01-01", 

306 "time_span_start": "2020-01-01", 

307 "time_span_end": "2024-12-31", 

308 "geo_span": "Greater Toronto Area", 

309 

310 # Access (Level 2) 

311 "access_category": "Open", 

312 "license_id": "odc-odbl", 

313 "location": "http://data.city.ca/housing-2025", 

314 "publisher": "City Planning Department", 

315 "publisher_email": "planning@city.ca", 

316 

317 # Content (Level 3) 

318 "language": "en,fr", 

319 "persistent_identifier": "yes", 

320 "global_unique_identifier": "yes", 

321 

322 # Privacy (Level 4) 

323 "contains_individual_data": "no", 

324 "contains_identifiable_individual_data": "no", 

325 

326 # Indigenous Data (Level 5) 

327 "contains_indigenous_data": "no", 

328 

329 # Quality (Level 6) 

330 "number_of_rows": "10000", 

331 "number_of_columns": "25" 

332 } 

333 

334 prepared_dict = prepare_data_dict(data_dict) 

335 mapping = config["mappings"] 

336 

337 result = compile_template( 

338 [mapping], 

339 extended_helpers, 

340 prepared_dict 

341 ) 

342 

343 # Validate core structure 

344 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/comprehensive-dataset-001" 

345 assert result["@type"] == "http://data.urbandatacentre.ca/catalogue" 

346 

347 # Validate multilingual fields 

348 assert len(result["dct:title"]) == 2 

349 assert len(result["dct:description"]) == 2 

350 assert len(result["dcat:keyword"]) == 6 # 3 tags × 2 languages 

351 

352 # Validate temporal fields 

353 assert "cudr:hasTemporalStart" in result 

354 assert "cudr:hasTemporalEnd" in result 

355 

356 # Validate license 

357 assert "dct:license" in result 

358 

359 # Validate quality metrics 

360 assert "cudr:rows" in result 

361 assert "cudr:columns" in result 

362 

363 

364class TestRDFGraphGeneration: 

365 """Test that compiled mappings generate valid RDF graphs.""" 

366 

367 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

368 def test_jsonld_to_rdf_graph(self, mock_get_default_lang, config, extended_helpers): 

369 """Test that compiled JSON-LD can be parsed into an RDF graph.""" 

370 mock_get_default_lang.return_value = "en" 

371 

372 data_dict = { 

373 "id": "test-rdf-001", 

374 "name": "test-dataset", 

375 "title_translated": {"en": "Test Dataset"}, 

376 "description_translated": {"en": "Test Description"}, 

377 "tags_translated": {"en": ["test", "rdf"]}, 

378 "published_date": "2025-01-01" 

379 } 

380 

381 prepared_dict = prepare_data_dict(data_dict) 

382 mapping = config["mappings"] 

383 

384 compiled = compile_template( 

385 [mapping], 

386 extended_helpers, 

387 prepared_dict 

388 ) 

389 

390 # Parse as RDF 

391 g = Graph() 

392 g.parse(data=compiled, format='json-ld') 

393 

394 # Define namespaces 

395 DCT = Namespace("http://purl.org/dc/terms/") 

396 DCAT = Namespace("http://www.w3.org/ns/dcat#") 

397 

398 # Validate subject exists 

399 subject = URIRef("http://data.urbandatacentre.ca/catalogue/test-rdf-001") 

400 assert (subject, None, None) in g 

401 

402 # Validate title exists 

403 titles = list(g.objects(subject, DCT.title)) 

404 assert len(titles) > 0 

405 

406 # Validate keywords exist 

407 keywords = list(g.objects(subject, DCAT.keyword)) 

408 assert len(keywords) >= 2 # At least "test" and "rdf" 

409 

410 

411class TestOptionalFieldHandling: 

412 """Test that optional fields are handled correctly.""" 

413 

414 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

415 def test_minimal_required_fields(self, mock_get_default_lang, config, extended_helpers): 

416 """Test transformation with only minimal required fields.""" 

417 mock_get_default_lang.return_value = "en" 

418 

419 # Only provide absolutely required fields 

420 data_dict = { 

421 "id": "minimal-001", 

422 "name": "minimal-dataset", 

423 "title_translated": {"en": "Minimal Dataset"} 

424 } 

425 

426 prepared_dict = prepare_data_dict(data_dict) 

427 mapping = config["mappings"] 

428 

429 result = compile_template( 

430 [mapping], 

431 extended_helpers, 

432 prepared_dict 

433 ) 

434 

435 # Should have ID and title 

436 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/minimal-001" 

437 assert "dct:title" in result 

438 

439 # Optional fields should not be present 

440 optional_fields = [ 

441 "dct:description", "cudr:accessCategory", "dct:license", 

442 "dcat:keyword", "cudr:file_size" 

443 ] 

444 

445 # At least some optional fields should be missing 

446 missing_count = sum(1 for field in optional_fields if field not in result) 

447 assert missing_count > 0 

448 

449 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang') 

450 def test_empty_optional_fields_removed(self, mock_get_default_lang, config, extended_helpers): 

451 """Test that empty optional fields are removed from output.""" 

452 mock_get_default_lang.return_value = "en" 

453 

454 data_dict = { 

455 "id": "test-empty-001", 

456 "name": "test-dataset", 

457 "title_translated": {"en": "Test"}, 

458 "description_translated": "", # Empty 

459 "file_size": "", # Empty 

460 "published_date": "" # Empty 

461 } 

462 

463 prepared_dict = prepare_data_dict(data_dict) 

464 mapping = config["mappings"] 

465 

466 result = compile_template( 

467 [mapping], 

468 extended_helpers, 

469 prepared_dict 

470 ) 

471 

472 # Empty fields should not appear 

473 assert "dct:issued" not in result 

474 assert "cudr:file_size" not in result