Coverage for ckanext/udc/graph/mapping_helpers.py: 93%
120 statements
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
1# This file contains the helper functions/variables that are available to the UDC mapping config.
3import uuid
4import urllib
5import urllib.parse
6from datetime import datetime
7from typing import List, Optional
8from rdflib import Literal, XSD
9from .contants import EMPTY_FIELD
10import ckan.model as model
11from ckanext.udc.solr.config import get_default_lang
13uuidMap = {}
14licenseMap = {}
17def generate_uuid(key=None):
18 """
19 Return a random UUID.
20 Calling this function with the same key will give you the same UUID.
21 """
22 if key is None:
23 return str(uuid.uuid4())
24 elif key in uuidMap:
25 return uuidMap[key]
26 else:
27 newUUID = str(uuid.uuid4())
28 uuidMap[key] = newUUID
29 return newUUID
32def to_integer(val: str):
33 return int(val)
36def to_float(val: str):
37 return float(val)
40def to_date(val: str):
41 if val == EMPTY_FIELD or val == '':
42 return EMPTY_FIELD
43 converted_xsd_date = Literal(val + "", datatype=XSD.date)
44 if converted_xsd_date:
45 return converted_xsd_date
46 else:
47 return EMPTY_FIELD
49def to_bool(val: str):
50 if val.lower() == 'yes':
51 return "true"
52 elif val.lower() == 'no':
53 return "false"
55def mapFromCKANLicense(val: str):
56 # val counld be license_id or license_url
57 register = model.Package.get_license_register()
58 if len(licenseMap) == 0:
59 for license_id, license in register.items():
60 licenseMap[license_id] = license.url
61 if licenseMap.get(val):
62 return [{"@id": licenseMap[val]}]
63 elif (val.startswith("http")):
64 return [{"@id": val}]
65 else:
66 # CKAN license that does not have url
67 return [{"@id": f"http://data.urbandatacentre.ca/licenses/{val}"}]
71# def to_datetime(val: str):
72# return Literal(val, datatype=XSD.datetTime)
75def split_to_uris(val: str, separator=","):
76 return [{"@id": uri} for uri in val.split(separator)]
79def quote_url(url: str):
80 """Encode URL but not encode the prefix http(s):// and preserve query string characters"""
81 vals = []
82 for item in url.strip().split("://"):
83 # Preserve / ? & = in the query string
84 vals.append(urllib.parse.quote(item, safe="/?&="))
85 return "://".join(vals)
88def mapFromCKANTags(tags_str: str):
89 tags = []
91 for tag in tags_str.split(","):
92 tags.append({
93 "@value": tag.strip()
94 })
96 return tags
98def map_from_tags_multiple_languages(tags_dict: dict):
99 # For tags_translated field: {lang: [tag, ...], ...} -> json-ld array
100 tags = []
102 for lang, tags_list in tags_dict.items():
103 for tag in tags_list:
104 tags.append({
105 "@language": lang,
106 "@value": tag.strip()
107 })
109 return tags
111def map_to_multiple_languages(val):
112 """Map a string or langs dict to a json-ld array. For custom fields"""
113 if isinstance(val, dict):
114 # If it's already a dict, convert it to the json-ld array format
115 return [{"@language": lang, "@value": value} for lang, value in val.items()]
116 elif isinstance(val, str):
117 # If it's a string, use the default language
118 default_lang = get_default_lang()
119 return [{"@language": default_lang, "@value": val}]
120 return []
122def map_to_single_language(val, lang='en'):
123 """Map a string or langs dict to a single string. For custom fields"""
124 if isinstance(val, dict):
125 # If it's already a dict, get the value for the specified language
126 if lang and lang in val:
127 return val[lang]
128 else:
129 # Return the value for the default language
130 default_lang = get_default_lang()
131 return val.get(default_lang, "")
132 elif isinstance(val, str):
133 # If it's a string, return it as is
134 return val
135 return ""
137def map_to_multiple_datasets(datasets: List[str]):
138 """Map a list of dataset urls to json-ld array for dct:Dataset"""
139 result = []
140 for ds in datasets:
141 ds_id = ds.get("id")
142 if ds_id:
143 result.append({
144 "@id": ds_id,
145 "dcat:landingPage": ds_id,
146 "dcat:accessURL": ds_id,
147 "@type": "dcat:Dataset"
148 })
149 return result
151def map_version_dataset_to_rdf(version_dataset: dict):
152 """Map a single version_dataset dict to RDF Dataset reference"""
153 if not version_dataset or not isinstance(version_dataset, dict):
154 return []
156 url = version_dataset.get("url", "")
157 title = version_dataset.get("title", "")
158 description = version_dataset.get("description", "")
160 if not url:
161 return []
163 result = {
164 "@id": url,
165 "@type": "dcat:Dataset"
166 }
168 if title:
169 result["http://purl.org/dc/terms/title"] = title
170 if description:
171 result["http://purl.org/dc/terms/description"] = description
173 return [result]
175def map_dataset_versions_to_rdf(dataset_versions: list):
176 """Map a list of dataset version dicts to RDF Dataset references"""
177 if not dataset_versions or not isinstance(dataset_versions, list):
178 return []
180 result = []
181 for ds in dataset_versions:
182 if not isinstance(ds, dict):
183 continue
185 url = ds.get("url", "")
186 title = ds.get("title", "")
187 description = ds.get("description", "")
189 if not url:
190 continue
192 ds_ref = {
193 "@id": url,
194 "@type": "dcat:Dataset"
195 }
197 if title:
198 ds_ref["http://purl.org/dc/terms/title"] = title
199 if description:
200 ds_ref["http://purl.org/dc/terms/description"] = description
202 result.append(ds_ref)
204 return result
206all_helpers = {
207 "generate_uuid": generate_uuid,
208 "to_integer": to_integer,
209 "to_float": to_float,
210 "to_date": to_date,
211 "to_bool": to_bool,
212 "split_to_uris": split_to_uris,
213 "mapFromCKANLicense": mapFromCKANLicense,
214 "mapFromCKANTags": mapFromCKANTags,
215 "quote_url": quote_url,
216 "map_to_multiple_languages": map_to_multiple_languages,
217 "map_to_single_language": map_to_single_language,
218 "map_to_multiple_datasets": map_to_multiple_datasets,
219 "map_from_tags_multiple_languages": map_from_tags_multiple_languages,
220 "map_version_dataset_to_rdf": map_version_dataset_to_rdf,
221 "map_dataset_versions_to_rdf": map_dataset_versions_to_rdf
222}