Coverage for ckanext/udc/cli/udc.py: 61%

195 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2026-03-30 22:15 +0000

1import click 

2import ckan.model as model 

3import json 

4import logging 

5import re 

6from importlib import import_module 

7import os 

8import polib 

9from pathlib import Path 

10from typing import Any, Callable, Dict, Iterable, List, Optional, Set 

11 

12from ckan.lib.i18n import build_js_translations, get_ckan_i18n_dir 

13 

14 

15@click.group(short_help=u"UDC commands.") 

16def udc(): 

17 pass 

18 

19 

20def _load_udc_config() -> Dict[str, Any]: 

21 existing_config = model.system_info.get_system_info("ckanext.udc.config") 

22 if existing_config: 

23 return json.loads(existing_config) 

24 

25 config_path = Path(__file__).resolve().parents[1] / "config.example.json" 

26 with config_path.open("r", encoding="utf-8") as fh: 

27 return json.load(fh) 

28 

29 

30def _get_number_field_names(udc_config: Dict[str, Any]) -> List[str]: 

31 fields: List[str] = [] 

32 seen: Set[str] = set() 

33 

34 for level in udc_config.get("maturity_model", []): 

35 for field in level.get("fields", []): 

36 if field.get("type") != "number": 

37 continue 

38 name = field.get("name") 

39 if not name or name in seen: 

40 continue 

41 fields.append(name) 

42 seen.add(name) 

43 

44 return fields 

45 

46 

47def _normalize_scalar_number(value: Any) -> Optional[str]: 

48 if value is None: 

49 return None 

50 

51 if isinstance(value, bool): 

52 return None 

53 

54 if isinstance(value, (int, float)): 

55 text = str(value) 

56 elif isinstance(value, str): 

57 text = value.strip() 

58 if not text: 

59 return None 

60 

61 # Accept common thousands separators such as 10,740 or 1,234.56. 

62 if re.match(r"^[+-]?\d{1,3}(,\d{3})+(\.\d+)?$", text): 

63 text = text.replace(",", "") 

64 else: 

65 return None 

66 

67 try: 

68 float(text) 

69 except (TypeError, ValueError): 

70 return None 

71 

72 return text 

73 

74 

75def _parse_jsonish(value: Any) -> Any: 

76 if isinstance(value, (dict, list, int, float)): 

77 return value 

78 if not isinstance(value, str): 

79 return None 

80 

81 text = value.strip() 

82 if not text: 

83 return None 

84 

85 try: 

86 return json.loads(text) 

87 except (TypeError, ValueError): 

88 return None 

89 

90 

91def _normalize_localized_number(value: Any) -> Optional[str]: 

92 parsed = _parse_jsonish(value) 

93 if not isinstance(parsed, dict): 

94 return None 

95 

96 candidates: List[str] = [] 

97 for localized_value in parsed.values(): 

98 if localized_value in (None, ""): 

99 continue 

100 normalized = _normalize_scalar_number(localized_value) 

101 if normalized is None: 

102 return None 

103 candidates.append(normalized) 

104 

105 if not candidates: 

106 return None 

107 

108 if len(set(candidates)) == 1: 

109 return candidates[0] 

110 

111 return None 

112 

113 

114def _inspect_number_field_value(value: Any) -> Dict[str, Any]: 

115 scalar_value = _normalize_scalar_number(value) 

116 if scalar_value is not None: 

117 if isinstance(value, str) and value.strip() != scalar_value: 

118 return {"status": "fixable_scalar", "normalized": scalar_value} 

119 return {"status": "ok", "normalized": scalar_value} 

120 

121 if value in (None, ""): 

122 return {"status": "empty", "normalized": None} 

123 

124 normalized_localized = _normalize_localized_number(value) 

125 if normalized_localized is not None: 

126 return { 

127 "status": "fixable_localized", 

128 "normalized": normalized_localized, 

129 } 

130 

131 parsed = _parse_jsonish(value) 

132 if isinstance(parsed, dict): 

133 return {"status": "invalid_localized", "normalized": None} 

134 

135 return {"status": "invalid", "normalized": None} 

136 

137 

138def _process_number_field_migration( 

139 packages: Iterable[Any], 

140 number_fields: List[str], 

141 fix: bool = False, 

142 echo: Optional[Callable[[str], None]] = None, 

143) -> Dict[str, int]: 

144 emit = echo or (lambda _message: None) 

145 stats = { 

146 "packages_scanned": 0, 

147 "packages_with_issues": 0, 

148 "issues_found": 0, 

149 "fixable": 0, 

150 "fixed": 0, 

151 "invalid": 0, 

152 } 

153 

154 for package in packages: 

155 stats["packages_scanned"] += 1 

156 package_has_issue = False 

157 

158 for field in number_fields: 

159 raw_value = package.extras.get(field) 

160 inspection = _inspect_number_field_value(raw_value) 

161 status = inspection["status"] 

162 

163 if status in {"ok", "empty"}: 

164 continue 

165 

166 package_has_issue = True 

167 stats["issues_found"] += 1 

168 package_id = getattr(package, "id", "<unknown>") 

169 package_name = getattr(package, "name", package_id) 

170 

171 if status in {"fixable_localized", "fixable_scalar"}: 

172 normalized = inspection["normalized"] 

173 stats["fixable"] += 1 

174 if fix: 

175 package.extras[field] = normalized 

176 stats["fixed"] += 1 

177 emit( 

178 f'Fix package {package_id} ({package_name}) field "{field}": {raw_value!r} -> {normalized!r}' 

179 ) 

180 else: 

181 emit( 

182 f'Would fix package {package_id} ({package_name}) field "{field}": {raw_value!r} -> {normalized!r}' 

183 ) 

184 continue 

185 

186 stats["invalid"] += 1 

187 emit( 

188 f'Invalid value on package {package_id} ({package_name}) field "{field}": {raw_value!r}' 

189 ) 

190 

191 if package_has_issue: 

192 stats["packages_with_issues"] += 1 

193 

194 return stats 

195 

196@udc.command() 

197def move_to_catalogues(): 

198 """ 

199 Make all packages have type=catalogue. 

200 This is used when we want to rename 'dataset' to 'catalogue'. 

201 """ 

202 datasets = model.Session.query(model.Package).filter(model.Package.type == "dataset") 

203 nothing_to_do = True 

204 

205 for dataset in datasets: 

206 if dataset.type == 'dataset': 

207 click.echo(f'Update Dataset {dataset.id}: dataset.type: "{dataset.type}" -> "catalogue"') 

208 dataset.type = 'catalogue' 

209 nothing_to_do = False 

210 

211 if nothing_to_do: 

212 click.echo("Nothing to do!") 

213 else: 

214 model.repo.commit_and_remove() 

215 click.echo("Done. Please restart the CKAN instance!") 

216 

217 

218@udc.command() 

219@click.option( 

220 "--fix", 

221 is_flag=True, 

222 default=False, 

223 help="Normalize fixable number-field values in place.", 

224) 

225def migrate_number_fields(fix): 

226 """ 

227 Check all non-deleted datasets/catalogues for malformed number extras. 

228 """ 

229 udc_config = _load_udc_config() 

230 number_fields = _get_number_field_names(udc_config) 

231 

232 if not number_fields: 

233 click.echo("No number fields found in UDC config.") 

234 return 

235 

236 packages = ( 

237 model.Session.query(model.Package) 

238 .filter(model.Package.state != "deleted") 

239 .filter(model.Package.type.in_(["catalogue", "dataset"])) 

240 .yield_per(100) 

241 ) 

242 

243 click.echo( 

244 "Checking number fields: " + ", ".join(number_fields) 

245 ) 

246 

247 stats = _process_number_field_migration(packages, number_fields, fix=fix, echo=click.echo) 

248 

249 if fix and stats["fixed"]: 

250 model.repo.commit_and_remove() 

251 click.echo( 

252 "Applied fixes in the database. Rebuild the search index before retrying indexing." 

253 ) 

254 

255 click.echo( 

256 "Summary: " 

257 f'packages_scanned={stats["packages_scanned"]} ' 

258 f'packages_with_issues={stats["packages_with_issues"]} ' 

259 f'issues_found={stats["issues_found"]} ' 

260 f'fixable={stats["fixable"]} ' 

261 f'fixed={stats["fixed"]} ' 

262 f'invalid={stats["invalid"]}' 

263 ) 

264 

265 if stats["issues_found"] and not fix: 

266 click.echo("Dry run only. Rerun with --fix to normalize the fixable values.") 

267 

268@udc.command() 

269def initdb(): 

270 """ 

271 Initialises the database with the required tables. 

272 """ 

273 log = logging.getLogger(__name__) 

274 

275 model.Session.remove() 

276 model.Session.configure(bind=model.meta.engine) 

277 

278 log.info("Initializing tables") 

279 

280 from ..licenses.model import init_tables 

281 init_tables() 

282 

283 libs = [ 

284 "ckanext.udc_import_other_portals.model", 

285 "ckanext.udc_react.model.organization_access_request", 

286 ] 

287 for lib_str in libs: 

288 try: 

289 lib = import_module(lib_str) 

290 lib.init_tables() 

291 except Exception as e: 

292 print(e) 

293 log.warning(f"Cannot init DB in {lib_str} plugin") 

294 

295 log.info("DB tables initialized") 

296 

297 

298@udc.command() 

299@click.option("--locale", default="fr", show_default=True, help="Locale to override.") 

300@click.option( 

301 "--source", 

302 default=None, 

303 help="Path to override ckan.po (defaults to ckanext-udc i18n).", 

304) 

305@click.option( 

306 "--build-js", 

307 is_flag=True, 

308 default=False, 

309 help="Also rebuild JS translations after copying.", 

310) 

311def override_ckan_translations(locale, source, build_js): 

312 """ 

313 Override CKAN core translations using a plugin-managed ckan.po file. 

314 """ 

315 if not source: 

316 base_dir = os.path.dirname(os.path.dirname(__file__)) 

317 source = os.path.join(base_dir, "i18n", locale, "LC_MESSAGES", "ckan.po") 

318 

319 if not os.path.isfile(source): 

320 raise click.ClickException(f"Source translation not found: {source}") 

321 

322 target_dir = get_ckan_i18n_dir() 

323 dest_dir = os.path.join(target_dir, locale, "LC_MESSAGES") 

324 os.makedirs(dest_dir, exist_ok=True) 

325 

326 dest_po = os.path.join(dest_dir, "ckan.po") 

327 dest_mo = os.path.join(dest_dir, "ckan.mo") 

328 

329 po = polib.pofile(source) 

330 po.save(dest_po) 

331 po.save_as_mofile(dest_mo) 

332 

333 if build_js: 

334 build_js_translations() 

335 

336 click.secho( 

337 f"CKAN translations overridden for locale '{locale}' in {dest_dir}", 

338 fg="green", 

339 bold=True, 

340 )