Coverage for ckanext/udc/solr/helpers.py: 70%
135 statements
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
« prev ^ index » next coverage.py v7.7.1, created at 2026-01-19 23:48 +0000
1from __future__ import annotations
2import ckan.model as model
3import logging
4import json
5import requests
6from requests.auth import HTTPBasicAuth
8from ckan.plugins.toolkit import config
9from ckan.lib.search.common import SolrSettings
11log = logging.getLogger(__name__)
14def get_solr_config():
15 solr_url, solr_user, solr_password = SolrSettings.get()
16 timeout = config.get("ckan.requests.timeout", 10) # Default timeout is 10 seconds
17 url = solr_url.rstrip("/")
18 return url, solr_user, solr_password, timeout
21def get_fields():
22 """
23 Fetches all fields from Solr.
24 """
25 solr_url, solr_user, solr_password, timeout = get_solr_config()
27 try:
28 response = requests.get(
29 f"{solr_url}/schema/fields",
30 timeout=timeout,
31 auth=HTTPBasicAuth(solr_user, solr_password),
32 )
33 response.raise_for_status()
34 fields = response.json()["fields"]
36 res = {}
37 for field in fields:
38 res[field["name"]] = field
40 # for field in fields:
41 # log.info(f"Field: {field['name']} (Type: {field['type']})")
42 return res
44 except requests.exceptions.RequestException as e:
45 log.error(f"Failed to fetch fields: {e}")
46 return {}
49def get_extras_fields():
50 """
51 Fetch all fields from Solr and return those that start with 'extras_'.
52 """
53 fields = get_fields()
54 extras_fields = {k: v for k, v in fields.items() if k.startswith("extras_")}
55 return extras_fields
58def delete_extras_fields():
59 """
60 Deletes all fields in Solr that start with 'extras_'.
61 """
62 extras_fields = get_extras_fields()
64 if not extras_fields:
65 log.info("No 'extras_*' fields found. Nothing to delete.")
66 return
68 solr_url, solr_user, solr_password, timeout = get_solr_config()
70 for field in extras_fields.keys():
71 payload = {"delete-field": {"name": field}}
72 headers = {"Content-Type": "application/json"}
74 try:
75 response = requests.post(
76 f"{solr_url}/schema",
77 data=json.dumps(payload),
78 headers=headers,
79 auth=HTTPBasicAuth(solr_user, solr_password),
80 timeout=timeout,
81 )
82 response.raise_for_status()
83 log.info(f"Deleted field: {field}")
85 except requests.exceptions.RequestException as e:
86 log.error(f"Error deleting field {field}: {e}")
89def delete_field(field_name):
90 """
91 Deletes a field in Solr.
92 """
93 solr_url, solr_user, solr_password, timeout = get_solr_config()
95 payload = {"delete-field": {"name": field_name}}
96 headers = {"Content-Type": "application/json"}
98 try:
99 response = requests.post(
100 f"{solr_url}/schema",
101 data=json.dumps(payload),
102 headers=headers,
103 auth=HTTPBasicAuth(solr_user, solr_password),
104 timeout=timeout,
105 )
106 response.raise_for_status()
107 log.info(f"Deleted field: {field_name}")
109 except requests.exceptions.RequestException as e:
110 log.error(f"Error deleting field {field_name}: {e}")
113def add_field(
114 field_name,
115 field_type="string",
116 indexed=True,
117 stored=True,
118 multi_valued=False,
119 docValues=False,
120):
121 """
122 Adds a new field to Solr dynamically.
123 """
124 solr_url, solr_user, solr_password, timeout = get_solr_config()
126 payload = {
127 "add-field": {
128 "name": field_name,
129 "type": field_type,
130 "indexed": indexed,
131 "stored": stored,
132 "multiValued": multi_valued,
133 "docValues": docValues,
134 }
135 }
137 headers = {"Content-Type": "application/json"}
139 try:
140 response = requests.post(
141 f"{solr_url}/schema",
142 data=json.dumps(payload),
143 headers=headers,
144 auth=HTTPBasicAuth(solr_user, solr_password),
145 timeout=timeout,
146 )
147 response.raise_for_status()
148 log.info(f"Field '{field_name}' added successfully.")
150 except requests.exceptions.RequestException as e:
151 log.error(f"Error adding field '{field_name}': {e}")
154def add_dynamic_field(
155 field_pattern,
156 field_type="pfloat",
157 indexed=True,
158 stored=True,
159 multi_valued=False,
160 docValues=False,
161):
162 """
163 Adds a new dynamic field to Solr.
164 """
165 solr_url, solr_user, solr_password, timeout = get_solr_config()
167 payload = {
168 "add-dynamic-field": {
169 "name": field_pattern,
170 "type": field_type,
171 "indexed": indexed,
172 "stored": stored,
173 "multiValued": multi_valued,
174 }
175 }
176 if docValues:
177 payload["add-dynamic-field"]["docValues"] = True
179 headers = {"Content-Type": "application/json"}
181 try:
182 response = requests.post(
183 f"{solr_url}/schema",
184 data=json.dumps(payload),
185 headers=headers,
186 auth=HTTPBasicAuth(solr_user, solr_password),
187 timeout=timeout,
188 )
189 response.raise_for_status()
190 log.info(f"Dynamic field '{field_pattern}' added successfully.")
191 except requests.exceptions.RequestException as e:
192 log.error(f"Error adding dynamic field '{field_pattern}': {e}")
195def get_field_types():
196 """
197 Returns a dict {type_name: {...}} of field types defined in the Solr core.
198 """
199 solr_url, solr_user, solr_password, timeout = get_solr_config()
200 try:
201 resp = requests.get(
202 f"{solr_url}/schema/fieldtypes",
203 timeout=timeout,
204 auth=HTTPBasicAuth(solr_user, solr_password),
205 )
206 resp.raise_for_status()
207 types = resp.json().get("fieldTypes", [])
208 return {t["name"]: t for t in types if "name" in t}
209 except requests.exceptions.RequestException as e:
210 log.error(f"Failed to fetch field types: {e}")
211 return {}
214def get_dynamic_fields():
215 """
216 Returns a dict {pattern: {...}} for dynamic fields.
217 """
218 solr_url, solr_user, solr_password, timeout = get_solr_config()
219 try:
220 resp = requests.get(
221 f"{solr_url}/schema/dynamicfields",
222 timeout=timeout,
223 auth=HTTPBasicAuth(solr_user, solr_password),
224 )
225 resp.raise_for_status()
226 dyn = resp.json().get("dynamicFields", [])
227 return {d["name"]: d for d in dyn if "name" in d}
228 except requests.exceptions.RequestException as e:
229 log.error(f"Failed to fetch dynamic fields: {e}")
230 return {}
233def add_copy_field(source_field, dest_field):
234 """
235 Adds a copyField rule to Solr.
236 """
237 solr_url, solr_user, solr_password, timeout = get_solr_config()
239 payload = {"add-copy-field": {"source": source_field, "dest": dest_field}}
241 headers = {"Content-Type": "application/json"}
243 try:
244 response = requests.post(
245 f"{solr_url}/schema",
246 data=json.dumps(payload),
247 headers=headers,
248 auth=HTTPBasicAuth(solr_user, solr_password),
249 timeout=timeout,
250 )
251 response.raise_for_status()
252 log.info(f"Copy field rule added: {source_field} -> {dest_field}")
254 except requests.exceptions.RequestException as e:
255 log.error(f"Error adding copy field rule: {e}")
258def delete_copy_field(field_name, dest_field=None):
259 """
260 Deletes a copyField rule in Solr.
261 """
262 solr_url, solr_user, solr_password, timeout = get_solr_config()
264 payload = {"delete-copy-field": {"source": field_name, "dest": dest_field}}
266 headers = {"Content-Type": "application/json"}
268 try:
269 response = requests.post(
270 f"{solr_url}/schema",
271 data=json.dumps(payload),
272 headers=headers,
273 auth=HTTPBasicAuth(solr_user, solr_password),
274 timeout=timeout,
275 )
276 response.raise_for_status()
277 log.info(f"Deleted copy field rule: {field_name}")
279 except requests.exceptions.RequestException as e:
280 log.error(f"Error deleting copy field rule: {e}")
283def ensure_language_dynamic_fields(langs):
284 """
285 Ensure dynamic fields exist for each language:
286 *_<lang>_txt -> text_* analyzer if present, else text_general
287 *_<lang>_f -> string (facets), stored+indexed, multiValued
288 """
289 types = get_field_types()
290 dynamic_fields = get_dynamic_fields()
292 # choose analyzer per lang, fallback to text_general
293 def analyzer_for(lang):
294 tname = f"text_{lang}"
295 if tname in types:
296 return tname
297 return "text_general"
299 for lang in langs:
300 txt_pat = f"*_{lang}_txt"
301 f_pat = f"*_{lang}_f"
303 if txt_pat not in dynamic_fields:
304 add_dynamic_field(
305 txt_pat,
306 field_type=analyzer_for(lang),
307 indexed=True,
308 stored=False,
309 multi_valued=True,
310 )
311 else:
312 log.info(f"Dynamic field already present: {txt_pat}")
314 if f_pat not in dynamic_fields:
315 # facets: exact string, docValues recommended for performance
316 add_dynamic_field(
317 f_pat,
318 field_type="string",
319 indexed=True,
320 stored=True,
321 multi_valued=True,
322 docValues=True,
323 )
324 else:
325 log.info(f"Dynamic field already present: {f_pat}")