Coverage for ckanext/udc/cli/udc.py: 61%
195 statements
« prev ^ index » next coverage.py v7.7.1, created at 2026-03-30 22:15 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2026-03-30 22:15 +0000
1import click
2import ckan.model as model
3import json
4import logging
5import re
6from importlib import import_module
7import os
8import polib
9from pathlib import Path
10from typing import Any, Callable, Dict, Iterable, List, Optional, Set
12from ckan.lib.i18n import build_js_translations, get_ckan_i18n_dir
15@click.group(short_help=u"UDC commands.")
16def udc():
17 pass
20def _load_udc_config() -> Dict[str, Any]:
21 existing_config = model.system_info.get_system_info("ckanext.udc.config")
22 if existing_config:
23 return json.loads(existing_config)
25 config_path = Path(__file__).resolve().parents[1] / "config.example.json"
26 with config_path.open("r", encoding="utf-8") as fh:
27 return json.load(fh)
30def _get_number_field_names(udc_config: Dict[str, Any]) -> List[str]:
31 fields: List[str] = []
32 seen: Set[str] = set()
34 for level in udc_config.get("maturity_model", []):
35 for field in level.get("fields", []):
36 if field.get("type") != "number":
37 continue
38 name = field.get("name")
39 if not name or name in seen:
40 continue
41 fields.append(name)
42 seen.add(name)
44 return fields
47def _normalize_scalar_number(value: Any) -> Optional[str]:
48 if value is None:
49 return None
51 if isinstance(value, bool):
52 return None
54 if isinstance(value, (int, float)):
55 text = str(value)
56 elif isinstance(value, str):
57 text = value.strip()
58 if not text:
59 return None
61 # Accept common thousands separators such as 10,740 or 1,234.56.
62 if re.match(r"^[+-]?\d{1,3}(,\d{3})+(\.\d+)?$", text):
63 text = text.replace(",", "")
64 else:
65 return None
67 try:
68 float(text)
69 except (TypeError, ValueError):
70 return None
72 return text
75def _parse_jsonish(value: Any) -> Any:
76 if isinstance(value, (dict, list, int, float)):
77 return value
78 if not isinstance(value, str):
79 return None
81 text = value.strip()
82 if not text:
83 return None
85 try:
86 return json.loads(text)
87 except (TypeError, ValueError):
88 return None
91def _normalize_localized_number(value: Any) -> Optional[str]:
92 parsed = _parse_jsonish(value)
93 if not isinstance(parsed, dict):
94 return None
96 candidates: List[str] = []
97 for localized_value in parsed.values():
98 if localized_value in (None, ""):
99 continue
100 normalized = _normalize_scalar_number(localized_value)
101 if normalized is None:
102 return None
103 candidates.append(normalized)
105 if not candidates:
106 return None
108 if len(set(candidates)) == 1:
109 return candidates[0]
111 return None
114def _inspect_number_field_value(value: Any) -> Dict[str, Any]:
115 scalar_value = _normalize_scalar_number(value)
116 if scalar_value is not None:
117 if isinstance(value, str) and value.strip() != scalar_value:
118 return {"status": "fixable_scalar", "normalized": scalar_value}
119 return {"status": "ok", "normalized": scalar_value}
121 if value in (None, ""):
122 return {"status": "empty", "normalized": None}
124 normalized_localized = _normalize_localized_number(value)
125 if normalized_localized is not None:
126 return {
127 "status": "fixable_localized",
128 "normalized": normalized_localized,
129 }
131 parsed = _parse_jsonish(value)
132 if isinstance(parsed, dict):
133 return {"status": "invalid_localized", "normalized": None}
135 return {"status": "invalid", "normalized": None}
138def _process_number_field_migration(
139 packages: Iterable[Any],
140 number_fields: List[str],
141 fix: bool = False,
142 echo: Optional[Callable[[str], None]] = None,
143) -> Dict[str, int]:
144 emit = echo or (lambda _message: None)
145 stats = {
146 "packages_scanned": 0,
147 "packages_with_issues": 0,
148 "issues_found": 0,
149 "fixable": 0,
150 "fixed": 0,
151 "invalid": 0,
152 }
154 for package in packages:
155 stats["packages_scanned"] += 1
156 package_has_issue = False
158 for field in number_fields:
159 raw_value = package.extras.get(field)
160 inspection = _inspect_number_field_value(raw_value)
161 status = inspection["status"]
163 if status in {"ok", "empty"}:
164 continue
166 package_has_issue = True
167 stats["issues_found"] += 1
168 package_id = getattr(package, "id", "<unknown>")
169 package_name = getattr(package, "name", package_id)
171 if status in {"fixable_localized", "fixable_scalar"}:
172 normalized = inspection["normalized"]
173 stats["fixable"] += 1
174 if fix:
175 package.extras[field] = normalized
176 stats["fixed"] += 1
177 emit(
178 f'Fix package {package_id} ({package_name}) field "{field}": {raw_value!r} -> {normalized!r}'
179 )
180 else:
181 emit(
182 f'Would fix package {package_id} ({package_name}) field "{field}": {raw_value!r} -> {normalized!r}'
183 )
184 continue
186 stats["invalid"] += 1
187 emit(
188 f'Invalid value on package {package_id} ({package_name}) field "{field}": {raw_value!r}'
189 )
191 if package_has_issue:
192 stats["packages_with_issues"] += 1
194 return stats
196@udc.command()
197def move_to_catalogues():
198 """
199 Make all packages have type=catalogue.
200 This is used when we want to rename 'dataset' to 'catalogue'.
201 """
202 datasets = model.Session.query(model.Package).filter(model.Package.type == "dataset")
203 nothing_to_do = True
205 for dataset in datasets:
206 if dataset.type == 'dataset':
207 click.echo(f'Update Dataset {dataset.id}: dataset.type: "{dataset.type}" -> "catalogue"')
208 dataset.type = 'catalogue'
209 nothing_to_do = False
211 if nothing_to_do:
212 click.echo("Nothing to do!")
213 else:
214 model.repo.commit_and_remove()
215 click.echo("Done. Please restart the CKAN instance!")
218@udc.command()
219@click.option(
220 "--fix",
221 is_flag=True,
222 default=False,
223 help="Normalize fixable number-field values in place.",
224)
225def migrate_number_fields(fix):
226 """
227 Check all non-deleted datasets/catalogues for malformed number extras.
228 """
229 udc_config = _load_udc_config()
230 number_fields = _get_number_field_names(udc_config)
232 if not number_fields:
233 click.echo("No number fields found in UDC config.")
234 return
236 packages = (
237 model.Session.query(model.Package)
238 .filter(model.Package.state != "deleted")
239 .filter(model.Package.type.in_(["catalogue", "dataset"]))
240 .yield_per(100)
241 )
243 click.echo(
244 "Checking number fields: " + ", ".join(number_fields)
245 )
247 stats = _process_number_field_migration(packages, number_fields, fix=fix, echo=click.echo)
249 if fix and stats["fixed"]:
250 model.repo.commit_and_remove()
251 click.echo(
252 "Applied fixes in the database. Rebuild the search index before retrying indexing."
253 )
255 click.echo(
256 "Summary: "
257 f'packages_scanned={stats["packages_scanned"]} '
258 f'packages_with_issues={stats["packages_with_issues"]} '
259 f'issues_found={stats["issues_found"]} '
260 f'fixable={stats["fixable"]} '
261 f'fixed={stats["fixed"]} '
262 f'invalid={stats["invalid"]}'
263 )
265 if stats["issues_found"] and not fix:
266 click.echo("Dry run only. Rerun with --fix to normalize the fixable values.")
268@udc.command()
269def initdb():
270 """
271 Initialises the database with the required tables.
272 """
273 log = logging.getLogger(__name__)
275 model.Session.remove()
276 model.Session.configure(bind=model.meta.engine)
278 log.info("Initializing tables")
280 from ..licenses.model import init_tables
281 init_tables()
283 libs = [
284 "ckanext.udc_import_other_portals.model",
285 "ckanext.udc_react.model.organization_access_request",
286 ]
287 for lib_str in libs:
288 try:
289 lib = import_module(lib_str)
290 lib.init_tables()
291 except Exception as e:
292 print(e)
293 log.warning(f"Cannot init DB in {lib_str} plugin")
295 log.info("DB tables initialized")
298@udc.command()
299@click.option("--locale", default="fr", show_default=True, help="Locale to override.")
300@click.option(
301 "--source",
302 default=None,
303 help="Path to override ckan.po (defaults to ckanext-udc i18n).",
304)
305@click.option(
306 "--build-js",
307 is_flag=True,
308 default=False,
309 help="Also rebuild JS translations after copying.",
310)
311def override_ckan_translations(locale, source, build_js):
312 """
313 Override CKAN core translations using a plugin-managed ckan.po file.
314 """
315 if not source:
316 base_dir = os.path.dirname(os.path.dirname(__file__))
317 source = os.path.join(base_dir, "i18n", locale, "LC_MESSAGES", "ckan.po")
319 if not os.path.isfile(source):
320 raise click.ClickException(f"Source translation not found: {source}")
322 target_dir = get_ckan_i18n_dir()
323 dest_dir = os.path.join(target_dir, locale, "LC_MESSAGES")
324 os.makedirs(dest_dir, exist_ok=True)
326 dest_po = os.path.join(dest_dir, "ckan.po")
327 dest_mo = os.path.join(dest_dir, "ckan.mo")
329 po = polib.pofile(source)
330 po.save(dest_po)
331 po.save_as_mofile(dest_mo)
333 if build_js:
334 build_js_translations()
336 click.secho(
337 f"CKAN translations overridden for locale '{locale}' in {dest_dir}",
338 fg="green",
339 bold=True,
340 )