Coverage for ckanext/udc/graph/mapping_helpers.py: 93%

120 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2026-01-19 23:48 +0000

1# This file contains the helper functions/variables that are available to the UDC mapping config. 

2 

3import uuid 

4import urllib 

5import urllib.parse 

6from datetime import datetime 

7from typing import List, Optional 

8from rdflib import Literal, XSD 

9from .contants import EMPTY_FIELD 

10import ckan.model as model 

11from ckanext.udc.solr.config import get_default_lang 

12 

13uuidMap = {} 

14licenseMap = {} 

15 

16 

17def generate_uuid(key=None): 

18 """ 

19 Return a random UUID. 

20 Calling this function with the same key will give you the same UUID. 

21 """ 

22 if key is None: 

23 return str(uuid.uuid4()) 

24 elif key in uuidMap: 

25 return uuidMap[key] 

26 else: 

27 newUUID = str(uuid.uuid4()) 

28 uuidMap[key] = newUUID 

29 return newUUID 

30 

31 

32def to_integer(val: str): 

33 return int(val) 

34 

35 

36def to_float(val: str): 

37 return float(val) 

38 

39 

40def to_date(val: str): 

41 if val == EMPTY_FIELD or val == '': 

42 return EMPTY_FIELD 

43 converted_xsd_date = Literal(val + "", datatype=XSD.date) 

44 if converted_xsd_date: 

45 return converted_xsd_date 

46 else: 

47 return EMPTY_FIELD 

48 

49def to_bool(val: str): 

50 if val.lower() == 'yes': 

51 return "true" 

52 elif val.lower() == 'no': 

53 return "false" 

54 

55def mapFromCKANLicense(val: str): 

56 # val counld be license_id or license_url 

57 register = model.Package.get_license_register() 

58 if len(licenseMap) == 0: 

59 for license_id, license in register.items(): 

60 licenseMap[license_id] = license.url 

61 if licenseMap.get(val): 

62 return [{"@id": licenseMap[val]}] 

63 elif (val.startswith("http")): 

64 return [{"@id": val}] 

65 else: 

66 # CKAN license that does not have url 

67 return [{"@id": f"http://data.urbandatacentre.ca/licenses/{val}"}] 

68 

69 

70 

71# def to_datetime(val: str): 

72# return Literal(val, datatype=XSD.datetTime) 

73 

74 

75def split_to_uris(val: str, separator=","): 

76 return [{"@id": uri} for uri in val.split(separator)] 

77 

78 

79def quote_url(url: str): 

80 """Encode URL but not encode the prefix http(s):// and preserve query string characters""" 

81 vals = [] 

82 for item in url.strip().split("://"): 

83 # Preserve / ? & = in the query string 

84 vals.append(urllib.parse.quote(item, safe="/?&=")) 

85 return "://".join(vals) 

86 

87 

88def mapFromCKANTags(tags_str: str): 

89 tags = [] 

90 

91 for tag in tags_str.split(","): 

92 tags.append({ 

93 "@value": tag.strip() 

94 }) 

95 

96 return tags 

97 

98def map_from_tags_multiple_languages(tags_dict: dict): 

99 # For tags_translated field: {lang: [tag, ...], ...} -> json-ld array 

100 tags = [] 

101 

102 for lang, tags_list in tags_dict.items(): 

103 for tag in tags_list: 

104 tags.append({ 

105 "@language": lang, 

106 "@value": tag.strip() 

107 }) 

108 

109 return tags 

110 

111def map_to_multiple_languages(val): 

112 """Map a string or langs dict to a json-ld array. For custom fields""" 

113 if isinstance(val, dict): 

114 # If it's already a dict, convert it to the json-ld array format 

115 return [{"@language": lang, "@value": value} for lang, value in val.items()] 

116 elif isinstance(val, str): 

117 # If it's a string, use the default language 

118 default_lang = get_default_lang() 

119 return [{"@language": default_lang, "@value": val}] 

120 return [] 

121 

122def map_to_single_language(val, lang='en'): 

123 """Map a string or langs dict to a single string. For custom fields""" 

124 if isinstance(val, dict): 

125 # If it's already a dict, get the value for the specified language 

126 if lang and lang in val: 

127 return val[lang] 

128 else: 

129 # Return the value for the default language 

130 default_lang = get_default_lang() 

131 return val.get(default_lang, "") 

132 elif isinstance(val, str): 

133 # If it's a string, return it as is 

134 return val 

135 return "" 

136 

137def map_to_multiple_datasets(datasets: List[str]): 

138 """Map a list of dataset urls to json-ld array for dct:Dataset""" 

139 result = [] 

140 for ds in datasets: 

141 ds_id = ds.get("id") 

142 if ds_id: 

143 result.append({ 

144 "@id": ds_id, 

145 "dcat:landingPage": ds_id, 

146 "dcat:accessURL": ds_id, 

147 "@type": "dcat:Dataset" 

148 }) 

149 return result 

150 

151def map_version_dataset_to_rdf(version_dataset: dict): 

152 """Map a single version_dataset dict to RDF Dataset reference""" 

153 if not version_dataset or not isinstance(version_dataset, dict): 

154 return [] 

155 

156 url = version_dataset.get("url", "") 

157 title = version_dataset.get("title", "") 

158 description = version_dataset.get("description", "") 

159 

160 if not url: 

161 return [] 

162 

163 result = { 

164 "@id": url, 

165 "@type": "dcat:Dataset" 

166 } 

167 

168 if title: 

169 result["http://purl.org/dc/terms/title"] = title 

170 if description: 

171 result["http://purl.org/dc/terms/description"] = description 

172 

173 return [result] 

174 

175def map_dataset_versions_to_rdf(dataset_versions: list): 

176 """Map a list of dataset version dicts to RDF Dataset references""" 

177 if not dataset_versions or not isinstance(dataset_versions, list): 

178 return [] 

179 

180 result = [] 

181 for ds in dataset_versions: 

182 if not isinstance(ds, dict): 

183 continue 

184 

185 url = ds.get("url", "") 

186 title = ds.get("title", "") 

187 description = ds.get("description", "") 

188 

189 if not url: 

190 continue 

191 

192 ds_ref = { 

193 "@id": url, 

194 "@type": "dcat:Dataset" 

195 } 

196 

197 if title: 

198 ds_ref["http://purl.org/dc/terms/title"] = title 

199 if description: 

200 ds_ref["http://purl.org/dc/terms/description"] = description 

201 

202 result.append(ds_ref) 

203 

204 return result 

205 

206all_helpers = { 

207 "generate_uuid": generate_uuid, 

208 "to_integer": to_integer, 

209 "to_float": to_float, 

210 "to_date": to_date, 

211 "to_bool": to_bool, 

212 "split_to_uris": split_to_uris, 

213 "mapFromCKANLicense": mapFromCKANLicense, 

214 "mapFromCKANTags": mapFromCKANTags, 

215 "quote_url": quote_url, 

216 "map_to_multiple_languages": map_to_multiple_languages, 

217 "map_to_single_language": map_to_single_language, 

218 "map_to_multiple_datasets": map_to_multiple_datasets, 

219 "map_from_tags_multiple_languages": map_from_tags_multiple_languages, 

220 "map_version_dataset_to_rdf": map_version_dataset_to_rdf, 

221 "map_dataset_versions_to_rdf": map_dataset_versions_to_rdf 

222}