Coverage for ckanext/udc/tests/graph/test_integration.py: 99%
170 statements
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
1"""
2Integration tests for the complete graph transformation pipeline.
4Tests the end-to-end transformation from CKAN package dictionaries
5to JSON-LD and RDF graphs using the real mapping configuration.
6"""
7import pytest
8import json
9from unittest.mock import Mock, patch
10from rdflib import Graph, URIRef, Literal, Namespace
11from pyld import jsonld
13from ckanext.udc.graph.template import compile_template
14from ckanext.udc.graph.mapping_helpers import all_helpers
15from ckanext.udc.graph.ckan_field import prepare_data_dict
16from ckanext.udc.graph.contants import EMPTY_FIELD
19@pytest.fixture(autouse=True)
20def mock_udc_plugin():
21 """Mock the UDC plugin for all tests."""
22 with patch('ckanext.udc.graph.template.get_plugin') as mock_get_plugin:
23 mock_plugin = Mock()
24 mock_plugin.text_fields = []
25 mock_get_plugin.return_value = mock_plugin
26 yield mock_plugin
29class TestBasicTransformation:
30 """Test basic transformation scenarios."""
32 def test_minimal_catalogue_entry(self):
33 """Test transforming a minimal catalogue entry."""
34 mapping = {
35 "@context": {
36 "dct": "http://purl.org/dc/terms/",
37 "xsd": "http://www.w3.org/2001/XMLSchema#"
38 },
39 "@id": "http://data.urbandatacentre.ca/catalogue/{id}",
40 "@type": "http://data.urbandatacentre.ca/catalogue",
41 "dct:title": "{title}"
42 }
44 data_dict = {
45 "id": "dataset-001",
46 "title_translated": "Test Dataset",
47 "name": "test-dataset"
48 }
50 prepared_dict = prepare_data_dict(data_dict)
51 result = compile_template(
52 [mapping],
53 all_helpers,
54 prepared_dict
55 )
57 assert result["@id"] == "http://data.urbandatacentre.ca/catalogue/dataset-001"
58 assert result["@type"] == "http://data.urbandatacentre.ca/catalogue"
59 assert result["dct:title"] == "Test Dataset"
61 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
62 def test_multilingual_catalogue_entry(self, mock_get_default_lang):
63 """Test transforming a multilingual catalogue entry."""
64 mock_get_default_lang.return_value = "en"
66 def map_to_multiple_languages(val):
67 if isinstance(val, dict):
68 return [{"@language": lang, "@value": value}
69 for lang, value in val.items()]
70 return [{"@language": "en", "@value": val}]
72 mapping = {
73 "@context": {"dct": "http://purl.org/dc/terms/"},
74 "@id": "http://data.urbandatacentre.ca/catalogue/{id}",
75 "dct:title": "eval(map_to_multiple_languages(title))",
76 "dct:description": "eval(map_to_multiple_languages(description))"
77 }
79 data_dict = {
80 "id": "dataset-002",
81 "title_translated": {
82 "en": "Housing Data",
83 "fr": "Données sur le logement"
84 },
85 "description_translated": {
86 "en": "Urban housing statistics",
87 "fr": "Statistiques sur le logement urbain"
88 },
89 "name": "housing-data"
90 }
92 prepared_dict = prepare_data_dict(data_dict)
93 helpers = {**all_helpers, "map_to_multiple_languages": map_to_multiple_languages}
95 result = compile_template(
96 [mapping],
97 helpers,
98 prepared_dict
99 )
101 assert len(result["dct:title"]) == 2
102 assert {"@language": "en", "@value": "Housing Data"} in result["dct:title"]
103 assert {"@language": "fr", "@value": "Données sur le logement"} in result["dct:title"]
104 assert len(result["dct:description"]) == 2
107class TestComplexTransformation:
108 """Test complex transformation scenarios with nested structures."""
110 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
111 def test_catalogue_with_publisher(self, mock_get_default_lang):
112 """Test transformation with nested publisher information."""
113 mock_get_default_lang.return_value = "en"
115 def map_to_multiple_languages(val):
116 if isinstance(val, dict):
117 return [{"@language": lang, "@value": value}
118 for lang, value in val.items()]
119 return [{"@language": "en", "@value": val}]
121 mapping = {
122 "@context": {
123 "dct": "http://purl.org/dc/terms/",
124 "foaf": "http://xmlns.com/foaf/0.1/"
125 },
126 "@id": "http://data.urbandatacentre.ca/catalogue/{id}",
127 "dct:title": "eval(map_to_multiple_languages(title))",
128 "dct:publisher": [{
129 "@id": "http://data.urbandatacentre.ca/org/{publisher_id}",
130 "@type": "foaf:Agent",
131 "foaf:name": "{publisher}",
132 "foaf:mbox": "{publisher_email}"
133 }]
134 }
136 data_dict = {
137 "id": "dataset-003",
138 "title_translated": {"en": "Transport Data"},
139 "name": "transport-data",
140 "publisher": "Urban Planning Department",
141 "publisher_email": "planning@city.ca",
142 "publisher_id": "upd-001"
143 }
145 prepared_dict = prepare_data_dict(data_dict)
146 helpers = {**all_helpers, "map_to_multiple_languages": map_to_multiple_languages}
148 result = compile_template(
149 [mapping],
150 helpers,
151 prepared_dict
152 )
154 assert len(result["dct:publisher"]) == 1
155 publisher = result["dct:publisher"][0]
156 assert publisher["@id"] == "http://data.urbandatacentre.ca/org/upd-001"
157 assert publisher["@type"] == "foaf:Agent"
158 assert publisher["foaf:name"] == "Urban Planning Department"
159 assert publisher["foaf:mbox"] == "planning@city.ca"
161 def test_optional_fields_removed(self):
162 """Test that optional empty fields are removed from output."""
163 mapping = {
164 "@id": "http://example.com/{id}",
165 "required": "{title}",
166 "optional1": "{description}",
167 "optional2": "{notes}",
168 "nested": [{
169 "field1": "{field1}",
170 "field2": "{field2}"
171 }]
172 }
174 data_dict = {
175 "id": "test-001",
176 "title": "Required Title",
177 "description": "", # Empty
178 # notes is missing
179 "field1": "Value 1"
180 # field2 is missing
181 }
183 prepared_dict = prepare_data_dict(data_dict)
184 result = compile_template(
185 [mapping],
186 all_helpers,
187 prepared_dict
188 )
190 assert result["required"] == "Required Title"
191 assert "optional1" not in result
192 assert "optional2" not in result
193 assert len(result["nested"]) == 1
194 assert result["nested"][0]["field1"] == "Value 1"
195 assert "field2" not in result["nested"][0]
198class TestDateAndTypeTransformations:
199 """Test transformations involving dates and type conversions."""
201 def test_date_transformation(self):
202 """Test date field transformation to XSD date."""
203 mapping = {
204 "@id": "http://example.com/{id}",
205 "dct:issued": {
206 "@type": "xsd:date",
207 "@value": "{to_date(published_date)}"
208 }
209 }
211 data_dict = {
212 "id": "test-001",
213 "published_date": "2025-01-15"
214 }
216 prepared_dict = prepare_data_dict(data_dict)
217 result = compile_template(
218 [mapping],
219 all_helpers,
220 prepared_dict
221 )
223 assert result["dct:issued"][0]["@type"] == "xsd:date"
224 assert "2025-01-15" in str(result["dct:issued"][0]["@value"])
226 def test_boolean_transformation(self):
227 """Test boolean field transformation."""
228 mapping = {
229 "@id": "http://example.com/{id}",
230 "hasData": {
231 "@type": "xsd:boolean",
232 "@value": "{to_bool(contains_data)}"
233 }
234 }
236 data_dict = {
237 "id": "test-001",
238 "contains_data": "yes"
239 }
241 prepared_dict = prepare_data_dict(data_dict)
242 result = compile_template(
243 [mapping],
244 all_helpers,
245 prepared_dict
246 )
248 assert result["hasData"][0]["@value"] == "true"
250 def test_empty_date_removed(self):
251 """Test that empty dates are removed from output."""
252 mapping = {
253 "@id": "http://example.com/{id}",
254 "title": "{title}",
255 "issued": {
256 "@type": "xsd:date",
257 "@value": "{to_date(published_date)}"
258 }
259 }
261 data_dict = {
262 "id": "test-001",
263 "title": "Test",
264 "published_date": ""
265 }
267 prepared_dict = prepare_data_dict(data_dict)
268 result = compile_template(
269 [mapping],
270 all_helpers,
271 prepared_dict
272 )
274 assert "title" in result
275 assert "issued" not in result
278class TestLicenseAndFormatTransformations:
279 """Test license and file format transformations."""
281 @patch('ckan.model.Package.get_license_register')
282 def test_license_mapping(self, mock_get_register):
283 """Test CKAN license mapping."""
284 mock_license = Mock()
285 mock_license.url = "http://creativecommons.org/licenses/by/4.0/"
286 mock_get_register.return_value = {"cc-by": mock_license}
288 mapping = {
289 "@id": "http://example.com/{id}",
290 "dct:license": "eval(mapFromCKANLicense(license_id))"
291 }
293 data_dict = {
294 "id": "test-001",
295 "license_id": "cc-by"
296 }
298 # Clear license map
299 from ckanext.udc.graph.mapping_helpers import licenseMap
300 licenseMap.clear()
302 prepared_dict = prepare_data_dict(data_dict)
303 result = compile_template(
304 [mapping],
305 all_helpers,
306 prepared_dict
307 )
309 assert len(result["dct:license"]) == 1
310 assert result["dct:license"][0]["@id"] == "http://creativecommons.org/licenses/by/4.0/"
312 def test_format_list_transformation(self):
313 """Test file format list transformation."""
314 mapping = {
315 "@id": "http://example.com/{id}",
316 "dct:format": "eval(split_to_uris(file_format))"
317 }
319 data_dict = {
320 "id": "test-001",
321 "file_format": "csv,json,xml"
322 }
324 prepared_dict = prepare_data_dict(data_dict)
325 result = compile_template(
326 [mapping],
327 all_helpers,
328 prepared_dict
329 )
331 assert len(result["dct:format"]) == 3
332 assert {"@id": "csv"} in result["dct:format"]
333 assert {"@id": "json"} in result["dct:format"]
334 assert {"@id": "xml"} in result["dct:format"]
337class TestURLTransformations:
338 """Test URL quoting and transformation."""
340 def test_url_quoting(self):
341 """Test URL quoting with spaces."""
342 mapping = {
343 "@id": "http://example.com/{id}",
344 "dcat:accessURL": {
345 "@id": "eval(quote_url(location))"
346 }
347 }
349 data_dict = {
350 "id": "test-001",
351 "location": "http://example.com/data with spaces"
352 }
354 prepared_dict = prepare_data_dict(data_dict)
355 result = compile_template(
356 [mapping],
357 all_helpers,
358 prepared_dict
359 )
361 assert "with%20spaces" in result["dcat:accessURL"][0]["@id"]
362 assert "http://example.com/" in result["dcat:accessURL"][0]["@id"]
365class TestTagTransformations:
366 """Test tag transformation scenarios."""
368 def test_multilingual_tags(self):
369 """Test multilingual tags transformation."""
370 def map_from_tags_multiple_languages(tags_dict):
371 tags = []
372 for lang, tags_list in tags_dict.items():
373 for tag in tags_list:
374 tags.append({
375 "@language": lang,
376 "@value": tag.strip()
377 })
378 return tags
380 mapping = {
381 "@id": "http://example.com/{id}",
382 "dcat:keyword": "eval(map_from_tags_multiple_languages(tags))"
383 }
385 data_dict = {
386 "id": "test-001",
387 "tags_translated": {
388 "en": ["housing", "transport"],
389 "fr": ["logement", "transport"]
390 },
391 "name": "test"
392 }
394 prepared_dict = prepare_data_dict(data_dict)
395 helpers = {
396 **all_helpers,
397 "map_from_tags_multiple_languages": map_from_tags_multiple_languages
398 }
400 result = compile_template(
401 [mapping],
402 helpers,
403 prepared_dict
404 )
406 assert len(result["dcat:keyword"]) == 4
407 assert {"@language": "en", "@value": "housing"} in result["dcat:keyword"]
408 assert {"@language": "fr", "@value": "logement"} in result["dcat:keyword"]
411class TestRDFGeneration:
412 """Test generation of actual RDF from compiled templates."""
414 @patch('ckanext.udc.graph.mapping_helpers.get_default_lang')
415 def test_jsonld_to_rdf_conversion(self, mock_get_default_lang):
416 """Test that compiled JSON-LD can be parsed as RDF."""
417 mock_get_default_lang.return_value = "en"
419 mapping = {
420 "@context": {
421 "dct": "http://purl.org/dc/terms/",
422 "xsd": "http://www.w3.org/2001/XMLSchema#"
423 },
424 "@id": "http://data.urbandatacentre.ca/catalogue/{id}",
425 "@type": "http://www.w3.org/ns/dcat#Dataset",
426 "dct:title": "{title}",
427 "dct:issued": {
428 "@type": "xsd:date",
429 "@value": "{to_date(published_date)}"
430 }
431 }
433 data_dict = {
434 "id": "dataset-001",
435 "title_translated": "Test Dataset",
436 "name": "test-dataset",
437 "published_date": "2025-01-01"
438 }
440 prepared_dict = prepare_data_dict(data_dict)
441 compiled = compile_template(
442 [mapping],
443 all_helpers,
444 prepared_dict
445 )
447 # Parse as RDF
448 g = Graph()
449 g.parse(data=compiled, format='json-ld')
451 # Verify triples exist
452 DCT = Namespace("http://purl.org/dc/terms/")
453 DCAT = Namespace("http://www.w3.org/ns/dcat#")
455 subject = URIRef("http://data.urbandatacentre.ca/catalogue/dataset-001")
457 # Check that subject exists
458 assert (subject, None, None) in g
460 # Check title exists
461 titles = list(g.objects(subject, DCT.title))
462 assert len(titles) > 0
464 # Check type
465 types = list(g.objects(subject, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")))
466 assert DCAT.Dataset in types
469class TestErrorHandling:
470 """Test error handling in transformations."""
472 def test_invalid_eval_expression(self):
473 """Test that invalid eval expressions are handled gracefully."""
474 mapping = {
475 "@id": "http://example.com/{id}",
476 "invalid": "eval(nonexistent_function())"
477 }
479 data_dict = {
480 "id": "test-001"
481 }
483 prepared_dict = prepare_data_dict(data_dict)
484 result = compile_template(
485 [mapping],
486 all_helpers,
487 prepared_dict
488 )
490 # Invalid field should be removed
491 assert "invalid" not in result
493 def test_missing_required_variable(self):
494 """Test handling of missing required variables."""
495 mapping = {
496 "@id": "http://example.com/{missing_id}",
497 "title": "{title}"
498 }
500 data_dict = {
501 "title": "Test"
502 }
504 prepared_dict = prepare_data_dict(data_dict)
505 result = compile_template(
506 [mapping],
507 all_helpers,
508 prepared_dict
509 )
511 # Should remove @id if variable is missing
512 assert "@id" not in result or "missing_id" in result.get("@id", "")