Coverage for ckanext/udc/tests/graph/test_config_validation.py: 96%
168 statements
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
1"""
2Tests validating the actual config.example.json mapping configuration.
4These tests validate that the real mapping configuration from config.example.json
5works correctly with realistic CKAN data.
6"""
7import pytest
8import json
9from unittest.mock import Mock, patch
10from pathlib import Path
11from rdflib import Graph, URIRef, Namespace
13from ckanext.udc.graph.template import compile_template
14from ckanext.udc.graph.mapping_helpers import all_helpers, licenseMap
15from ckanext.udc.graph.ckan_field import prepare_data_dict
18# Load actual config
19CONFIG_PATH = Path(__file__).parent.parent.parent / "config.example.json"
22@pytest.fixture(autouse=True)
23def mock_udc_plugin():
24 """Mock the UDC plugin for all tests."""
25 with patch('ckanext.udc.graph.template.get_plugin') as mock_get_plugin:
26 mock_plugin = Mock()
27 mock_plugin.text_fields = ['title', 'description']
28 mock_get_plugin.return_value = mock_plugin
29 yield mock_plugin
32@pytest.fixture
33def config():
34 """Load the actual config.example.json file."""
35 with open(CONFIG_PATH, 'r') as f:
36 return json.load(f)
39@pytest.fixture
40def extended_helpers():
41 """Extended helpers including those needed for config.example.json."""
42 def map_to_multiple_languages(val):
43 if isinstance(val, dict):
44 return [{"@language": lang, "@value": value}
45 for lang, value in val.items()]
46 return [{"@language": "en", "@value": val}]
48 def map_from_tags_multiple_languages(tags_dict):
49 tags = []
50 for lang, tags_list in tags_dict.items():
51 for tag in tags_list:
52 tags.append({"@language": lang, "@value": tag.strip()})
53 return tags
55 def map_to_multiple_datasets(datasets):
56 result = []
57 for ds in datasets:
58 ds_id = ds.get("id")
59 if ds_id:
60 result.append({
61 "@id": ds_id,
62 "dcat:landingPage": ds_id,
63 "dcat:accessURL": ds_id,
64 "@type": "dcat:Dataset"
65 })
66 return result
68 return {
69 **all_helpers,
70 "map_to_multiple_languages": map_to_multiple_languages,
71 "map_from_tags_multiple_languages": map_from_tags_multiple_languages,
72 "map_to_multiple_datasets": map_to_multiple_datasets
73 }
76class TestConfigStructure:
77 """Test the structure of config.example.json."""
79 def test_config_loads(self, config):
80 """Test that config.example.json loads successfully."""
81 assert config is not None
82 assert isinstance(config, dict)
84 def test_has_maturity_model(self, config):
85 """Test that config has maturity_model section."""
86 assert "maturity_model" in config
87 assert isinstance(config["maturity_model"], list)
88 assert len(config["maturity_model"]) == 6 # 6 maturity levels
90 def test_has_mappings(self, config):
91 """Test that config has mappings section."""
92 assert "mappings" in config
93 assert isinstance(config["mappings"], dict)
95 def test_mappings_has_context(self, config):
96 """Test that mappings have @context."""
97 assert "@context" in config["mappings"]
98 assert isinstance(config["mappings"]["@context"], dict)
100 def test_mappings_has_id(self, config):
101 """Test that mappings have @id."""
102 assert "@id" in config["mappings"]
104 def test_maturity_levels_structure(self, config):
105 """Test maturity level structure."""
106 for level in config["maturity_model"]:
107 assert "title" in level
108 assert "name" in level
109 assert "fields" in level
110 assert isinstance(level["fields"], list)
113class TestMaturityLevel1Fields:
114 """Test Maturity Level 1 (Basic Information) fields mapping."""
116 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
117 def test_basic_fields_transformation(self, mock_get_default_lang, config, extended_helpers):
118 """Test transformation of basic maturity level 1 fields."""
119 mock_get_default_lang.return_value = "en"
121 data_dict = {
122 "id": "housing-2025",
123 "name": "housing-data-2025",
124 "title_translated": {
125 "en": "Housing Statistics 2025",
126 "fr": "Statistiques sur le logement 2025"
127 },
128 "description_translated": {
129 "en": "Comprehensive housing data",
130 "fr": "Données complètes sur le logement"
131 },
132 "tags_translated": {
133 "en": ["housing", "statistics"],
134 "fr": ["logement", "statistiques"]
135 },
136 "theme": "Housing",
137 "file_format": "csv,json",
138 "file_size": "125.5",
139 "unique_metadata_identifier": "meta-001",
140 "published_date": "2025-01-01",
141 "time_span_start": "2024-01-01",
142 "time_span_end": "2024-12-31",
143 "geo_span": "Toronto"
144 }
146 prepared_dict = prepare_data_dict(data_dict)
147 mapping = config["mappings"]
149 result = compile_template(
150 [mapping],
151 extended_helpers,
152 prepared_dict
153 )
155 # Validate basic structure
156 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/housing-2025"
157 assert result["@type"] == "http://data.urbandatacentre.ca/catalogue"
159 # Validate multilingual title
160 assert "dct:title" in result
161 assert len(result["dct:title"][0]) == 2
163 # Validate theme
164 assert "dcat:theme" in result
166 # Validate file size
167 assert "cudr:file_size" in result
170class TestMaturityLevel2Fields:
171 """Test Maturity Level 2 (Access) fields mapping."""
173 @patch('ckan.model.Package.get_license_register')
174 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
175 def test_access_fields_transformation(self, mock_get_default_lang, mock_get_register,
176 config, extended_helpers):
177 """Test transformation of access-related fields."""
178 mock_get_default_lang.return_value = "en"
179 mock_license = Mock()
180 mock_license.url = "http://creativecommons.org/licenses/by/4.0/"
181 mock_get_register.return_value = {"cc-by": mock_license}
183 licenseMap.clear()
185 data_dict = {
186 "id": "dataset-002",
187 "name": "test-dataset",
188 "title_translated": {"en": "Test"},
189 "access_category": "Open",
190 "license_id": "cc-by",
191 "limits_on_use": "Academic use only",
192 "location": "http://example.com/data",
193 "data_service": "http://ckan.example.com",
194 "owner": "Data Owner Org",
195 "access_steward": "John Doe",
196 "access_steward_email": "john@example.com",
197 "publisher": "Publishing Org",
198 "publisher_email": "pub@example.com"
199 }
201 prepared_dict = prepare_data_dict(data_dict)
202 mapping = config["mappings"]
204 result = compile_template(
205 [mapping],
206 extended_helpers,
207 prepared_dict
208 )
210 # Validate license mapping
211 assert "dct:license" in result
212 assert len(result["dct:license"]) == 1
214 # Validate access URL
215 assert "dcat:accessURL" in result
217 # Validate publisher
218 assert "dct:publisher" in result
220 # Validate contact point
221 assert "dcat:contactPoint" in result
224class TestMaturityLevel3Fields:
225 """Test Maturity Level 3 (Content) fields mapping."""
227 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
228 def test_content_fields_transformation(self, mock_get_default_lang, config, extended_helpers):
229 """Test transformation of content-related fields."""
230 mock_get_default_lang.return_value = "en"
232 data_dict = {
233 "id": "dataset-003",
234 "name": "test-dataset",
235 "title_translated": {"en": "Test"},
236 "accessed_date": "2025-01-15",
237 "description_document": "http://example.com/docs",
238 "language": "en,fr",
239 "persistent_identifier": "yes",
240 "global_unique_identifier": "yes",
241 "file_format": "csv,json",
242 "source": "http://example.com/source",
243 "version": "1.0",
244 "version_dataset": {"url": "http://example.com/v1", "title": "Version 1"},
245 "dataset_versions": [
246 {"url": "http://example.com/v2", "title": "Version 2"}
247 ],
248 "provenance": "Original data collected in 2024",
249 "provenance_url": "http://example.com/provenance"
250 }
252 prepared_dict = prepare_data_dict(data_dict)
253 mapping = config["mappings"]
255 result = compile_template(
256 [mapping],
257 extended_helpers,
258 prepared_dict
259 )
261 # Validate language mapping
262 assert "dct:language" in result
264 # Validate FAIR indicators
265 assert "fair:rda-f1-01d" in result
266 assert "fair:rda-f1-02d" in result
269class TestCompleteMaturityLevels:
270 """Test transformation with data from all maturity levels."""
272 @patch('ckan.model.Package.get_license_register')
273 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
274 def test_complete_catalogue_entry(self, mock_get_default_lang, mock_get_register,
275 config, extended_helpers):
276 """Test transformation with comprehensive data across all maturity levels."""
277 mock_get_default_lang.return_value = "en"
278 mock_license = Mock()
279 mock_license.url = "http://opendatacommons.org/licenses/odbl/"
280 mock_get_register.return_value = {"odc-odbl": mock_license}
282 licenseMap.clear()
284 # Comprehensive catalogue entry with fields from all maturity levels
285 data_dict = {
286 # Basic Information (Level 1)
287 "id": "comprehensive-dataset-001",
288 "name": "comprehensive-housing-data",
289 "title_translated": {
290 "en": "Comprehensive Housing Data 2025",
291 "fr": "Données complètes sur le logement 2025"
292 },
293 "description_translated": {
294 "en": "Complete housing statistics for urban areas",
295 "fr": "Statistiques complètes sur le logement urbain"
296 },
297 "tags_translated": {
298 "en": ["housing", "statistics", "urban"],
299 "fr": ["logement", "statistiques", "urbain"]
300 },
301 "theme": "Housing",
302 "file_format": "csv,json,xml",
303 "file_size": "250.75",
304 "unique_metadata_identifier": "meta-comprehensive-001",
305 "published_date": "2025-01-01",
306 "time_span_start": "2020-01-01",
307 "time_span_end": "2024-12-31",
308 "geo_span": "Greater Toronto Area",
310 # Access (Level 2)
311 "access_category": "Open",
312 "license_id": "odc-odbl",
313 "location": "http://data.city.ca/housing-2025",
314 "publisher": "City Planning Department",
315 "publisher_email": "planning@city.ca",
317 # Content (Level 3)
318 "language": "en,fr",
319 "persistent_identifier": "yes",
320 "global_unique_identifier": "yes",
322 # Privacy (Level 4)
323 "contains_individual_data": "no",
324 "contains_identifiable_individual_data": "no",
326 # Indigenous Data (Level 5)
327 "contains_indigenous_data": "no",
329 # Quality (Level 6)
330 "number_of_rows": "10000",
331 "number_of_columns": "25"
332 }
334 prepared_dict = prepare_data_dict(data_dict)
335 mapping = config["mappings"]
337 result = compile_template(
338 [mapping],
339 extended_helpers,
340 prepared_dict
341 )
343 # Validate core structure
344 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/comprehensive-dataset-001"
345 assert result["@type"] == "http://data.urbandatacentre.ca/catalogue"
347 # Validate multilingual fields
348 assert len(result["dct:title"]) == 2
349 assert len(result["dct:description"]) == 2
350 assert len(result["dcat:keyword"]) == 6 # 3 tags × 2 languages
352 # Validate temporal fields
353 assert "cudr:hasTemporalStart" in result
354 assert "cudr:hasTemporalEnd" in result
356 # Validate license
357 assert "dct:license" in result
359 # Validate quality metrics
360 assert "cudr:rows" in result
361 assert "cudr:columns" in result
364class TestRDFGraphGeneration:
365 """Test that compiled mappings generate valid RDF graphs."""
367 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
368 def test_jsonld_to_rdf_graph(self, mock_get_default_lang, config, extended_helpers):
369 """Test that compiled JSON-LD can be parsed into an RDF graph."""
370 mock_get_default_lang.return_value = "en"
372 data_dict = {
373 "id": "test-rdf-001",
374 "name": "test-dataset",
375 "title_translated": {"en": "Test Dataset"},
376 "description_translated": {"en": "Test Description"},
377 "tags_translated": {"en": ["test", "rdf"]},
378 "published_date": "2025-01-01"
379 }
381 prepared_dict = prepare_data_dict(data_dict)
382 mapping = config["mappings"]
384 compiled = compile_template(
385 [mapping],
386 extended_helpers,
387 prepared_dict
388 )
390 # Parse as RDF
391 g = Graph()
392 g.parse(data=compiled, format='json-ld')
394 # Define namespaces
395 DCT = Namespace("http://purl.org/dc/terms/")
396 DCAT = Namespace("http://www.w3.org/ns/dcat#")
398 # Validate subject exists
399 subject = URIRef("http://data.urbandatacentre.ca/catalogue/test-rdf-001")
400 assert (subject, None, None) in g
402 # Validate title exists
403 titles = list(g.objects(subject, DCT.title))
404 assert len(titles) > 0
406 # Validate keywords exist
407 keywords = list(g.objects(subject, DCAT.keyword))
408 assert len(keywords) >= 2 # At least "test" and "rdf"
411class TestOptionalFieldHandling:
412 """Test that optional fields are handled correctly."""
414 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
415 def test_minimal_required_fields(self, mock_get_default_lang, config, extended_helpers):
416 """Test transformation with only minimal required fields."""
417 mock_get_default_lang.return_value = "en"
419 # Only provide absolutely required fields
420 data_dict = {
421 "id": "minimal-001",
422 "name": "minimal-dataset",
423 "title_translated": {"en": "Minimal Dataset"}
424 }
426 prepared_dict = prepare_data_dict(data_dict)
427 mapping = config["mappings"]
429 result = compile_template(
430 [mapping],
431 extended_helpers,
432 prepared_dict
433 )
435 # Should have ID and title
436 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/minimal-001"
437 assert "dct:title" in result
439 # Optional fields should not be present
440 optional_fields = [
441 "dct:description", "cudr:accessCategory", "dct:license",
442 "dcat:keyword", "cudr:file_size"
443 ]
445 # At least some optional fields should be missing
446 missing_count = sum(1 for field in optional_fields if field not in result)
447 assert missing_count > 0
449 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
450 def test_empty_optional_fields_removed(self, mock_get_default_lang, config, extended_helpers):
451 """Test that empty optional fields are removed from output."""
452 mock_get_default_lang.return_value = "en"
454 data_dict = {
455 "id": "test-empty-001",
456 "name": "test-dataset",
457 "title_translated": {"en": "Test"},
458 "description_translated": "", # Empty
459 "file_size": "", # Empty
460 "published_date": "" # Empty
461 }
463 prepared_dict = prepare_data_dict(data_dict)
464 mapping = config["mappings"]
466 result = compile_template(
467 [mapping],
468 extended_helpers,
469 prepared_dict
470 )
472 # Empty fields should not appear
473 assert "dct:issued" not in result
474 assert "cudr:file_size" not in result