Coverage for ckanext/udc/solr/helpers.py: 70%

135 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2026-01-19 23:48 +0000

1from __future__ import annotations 

2import ckan.model as model 

3import logging 

4import json 

5import requests 

6from requests.auth import HTTPBasicAuth 

7 

8from ckan.plugins.toolkit import config 

9from ckan.lib.search.common import SolrSettings 

10 

11log = logging.getLogger(__name__) 

12 

13 

14def get_solr_config(): 

15 solr_url, solr_user, solr_password = SolrSettings.get() 

16 timeout = config.get("ckan.requests.timeout", 10) # Default timeout is 10 seconds 

17 url = solr_url.rstrip("/") 

18 return url, solr_user, solr_password, timeout 

19 

20 

21def get_fields(): 

22 """ 

23 Fetches all fields from Solr. 

24 """ 

25 solr_url, solr_user, solr_password, timeout = get_solr_config() 

26 

27 try: 

28 response = requests.get( 

29 f"{solr_url}/schema/fields", 

30 timeout=timeout, 

31 auth=HTTPBasicAuth(solr_user, solr_password), 

32 ) 

33 response.raise_for_status() 

34 fields = response.json()["fields"] 

35 

36 res = {} 

37 for field in fields: 

38 res[field["name"]] = field 

39 

40 # for field in fields: 

41 # log.info(f"Field: {field['name']} (Type: {field['type']})") 

42 return res 

43 

44 except requests.exceptions.RequestException as e: 

45 log.error(f"Failed to fetch fields: {e}") 

46 return {} 

47 

48 

49def get_extras_fields(): 

50 """ 

51 Fetch all fields from Solr and return those that start with 'extras_'. 

52 """ 

53 fields = get_fields() 

54 extras_fields = {k: v for k, v in fields.items() if k.startswith("extras_")} 

55 return extras_fields 

56 

57 

58def delete_extras_fields(): 

59 """ 

60 Deletes all fields in Solr that start with 'extras_'. 

61 """ 

62 extras_fields = get_extras_fields() 

63 

64 if not extras_fields: 

65 log.info("No 'extras_*' fields found. Nothing to delete.") 

66 return 

67 

68 solr_url, solr_user, solr_password, timeout = get_solr_config() 

69 

70 for field in extras_fields.keys(): 

71 payload = {"delete-field": {"name": field}} 

72 headers = {"Content-Type": "application/json"} 

73 

74 try: 

75 response = requests.post( 

76 f"{solr_url}/schema", 

77 data=json.dumps(payload), 

78 headers=headers, 

79 auth=HTTPBasicAuth(solr_user, solr_password), 

80 timeout=timeout, 

81 ) 

82 response.raise_for_status() 

83 log.info(f"Deleted field: {field}") 

84 

85 except requests.exceptions.RequestException as e: 

86 log.error(f"Error deleting field {field}: {e}") 

87 

88 

89def delete_field(field_name): 

90 """ 

91 Deletes a field in Solr. 

92 """ 

93 solr_url, solr_user, solr_password, timeout = get_solr_config() 

94 

95 payload = {"delete-field": {"name": field_name}} 

96 headers = {"Content-Type": "application/json"} 

97 

98 try: 

99 response = requests.post( 

100 f"{solr_url}/schema", 

101 data=json.dumps(payload), 

102 headers=headers, 

103 auth=HTTPBasicAuth(solr_user, solr_password), 

104 timeout=timeout, 

105 ) 

106 response.raise_for_status() 

107 log.info(f"Deleted field: {field_name}") 

108 

109 except requests.exceptions.RequestException as e: 

110 log.error(f"Error deleting field {field_name}: {e}") 

111 

112 

113def add_field( 

114 field_name, 

115 field_type="string", 

116 indexed=True, 

117 stored=True, 

118 multi_valued=False, 

119 docValues=False, 

120): 

121 """ 

122 Adds a new field to Solr dynamically. 

123 """ 

124 solr_url, solr_user, solr_password, timeout = get_solr_config() 

125 

126 payload = { 

127 "add-field": { 

128 "name": field_name, 

129 "type": field_type, 

130 "indexed": indexed, 

131 "stored": stored, 

132 "multiValued": multi_valued, 

133 "docValues": docValues, 

134 } 

135 } 

136 

137 headers = {"Content-Type": "application/json"} 

138 

139 try: 

140 response = requests.post( 

141 f"{solr_url}/schema", 

142 data=json.dumps(payload), 

143 headers=headers, 

144 auth=HTTPBasicAuth(solr_user, solr_password), 

145 timeout=timeout, 

146 ) 

147 response.raise_for_status() 

148 log.info(f"Field '{field_name}' added successfully.") 

149 

150 except requests.exceptions.RequestException as e: 

151 log.error(f"Error adding field '{field_name}': {e}") 

152 

153 

154def add_dynamic_field( 

155 field_pattern, 

156 field_type="pfloat", 

157 indexed=True, 

158 stored=True, 

159 multi_valued=False, 

160 docValues=False, 

161): 

162 """ 

163 Adds a new dynamic field to Solr. 

164 """ 

165 solr_url, solr_user, solr_password, timeout = get_solr_config() 

166 

167 payload = { 

168 "add-dynamic-field": { 

169 "name": field_pattern, 

170 "type": field_type, 

171 "indexed": indexed, 

172 "stored": stored, 

173 "multiValued": multi_valued, 

174 } 

175 } 

176 if docValues: 

177 payload["add-dynamic-field"]["docValues"] = True 

178 

179 headers = {"Content-Type": "application/json"} 

180 

181 try: 

182 response = requests.post( 

183 f"{solr_url}/schema", 

184 data=json.dumps(payload), 

185 headers=headers, 

186 auth=HTTPBasicAuth(solr_user, solr_password), 

187 timeout=timeout, 

188 ) 

189 response.raise_for_status() 

190 log.info(f"Dynamic field '{field_pattern}' added successfully.") 

191 except requests.exceptions.RequestException as e: 

192 log.error(f"Error adding dynamic field '{field_pattern}': {e}") 

193 

194 

195def get_field_types(): 

196 """ 

197 Returns a dict {type_name: {...}} of field types defined in the Solr core. 

198 """ 

199 solr_url, solr_user, solr_password, timeout = get_solr_config() 

200 try: 

201 resp = requests.get( 

202 f"{solr_url}/schema/fieldtypes", 

203 timeout=timeout, 

204 auth=HTTPBasicAuth(solr_user, solr_password), 

205 ) 

206 resp.raise_for_status() 

207 types = resp.json().get("fieldTypes", []) 

208 return {t["name"]: t for t in types if "name" in t} 

209 except requests.exceptions.RequestException as e: 

210 log.error(f"Failed to fetch field types: {e}") 

211 return {} 

212 

213 

214def get_dynamic_fields(): 

215 """ 

216 Returns a dict {pattern: {...}} for dynamic fields. 

217 """ 

218 solr_url, solr_user, solr_password, timeout = get_solr_config() 

219 try: 

220 resp = requests.get( 

221 f"{solr_url}/schema/dynamicfields", 

222 timeout=timeout, 

223 auth=HTTPBasicAuth(solr_user, solr_password), 

224 ) 

225 resp.raise_for_status() 

226 dyn = resp.json().get("dynamicFields", []) 

227 return {d["name"]: d for d in dyn if "name" in d} 

228 except requests.exceptions.RequestException as e: 

229 log.error(f"Failed to fetch dynamic fields: {e}") 

230 return {} 

231 

232 

233def add_copy_field(source_field, dest_field): 

234 """ 

235 Adds a copyField rule to Solr. 

236 """ 

237 solr_url, solr_user, solr_password, timeout = get_solr_config() 

238 

239 payload = {"add-copy-field": {"source": source_field, "dest": dest_field}} 

240 

241 headers = {"Content-Type": "application/json"} 

242 

243 try: 

244 response = requests.post( 

245 f"{solr_url}/schema", 

246 data=json.dumps(payload), 

247 headers=headers, 

248 auth=HTTPBasicAuth(solr_user, solr_password), 

249 timeout=timeout, 

250 ) 

251 response.raise_for_status() 

252 log.info(f"Copy field rule added: {source_field} -> {dest_field}") 

253 

254 except requests.exceptions.RequestException as e: 

255 log.error(f"Error adding copy field rule: {e}") 

256 

257 

258def delete_copy_field(field_name, dest_field=None): 

259 """ 

260 Deletes a copyField rule in Solr. 

261 """ 

262 solr_url, solr_user, solr_password, timeout = get_solr_config() 

263 

264 payload = {"delete-copy-field": {"source": field_name, "dest": dest_field}} 

265 

266 headers = {"Content-Type": "application/json"} 

267 

268 try: 

269 response = requests.post( 

270 f"{solr_url}/schema", 

271 data=json.dumps(payload), 

272 headers=headers, 

273 auth=HTTPBasicAuth(solr_user, solr_password), 

274 timeout=timeout, 

275 ) 

276 response.raise_for_status() 

277 log.info(f"Deleted copy field rule: {field_name}") 

278 

279 except requests.exceptions.RequestException as e: 

280 log.error(f"Error deleting copy field rule: {e}") 

281 

282 

283def ensure_language_dynamic_fields(langs): 

284 """ 

285 Ensure dynamic fields exist for each language: 

286 *_<lang>_txt -> text_* analyzer if present, else text_general 

287 *_<lang>_f -> string (facets), stored+indexed, multiValued 

288 """ 

289 types = get_field_types() 

290 dynamic_fields = get_dynamic_fields() 

291 

292 # choose analyzer per lang, fallback to text_general 

293 def analyzer_for(lang): 

294 tname = f"text_{lang}" 

295 if tname in types: 

296 return tname 

297 return "text_general" 

298 

299 for lang in langs: 

300 txt_pat = f"*_{lang}_txt" 

301 f_pat = f"*_{lang}_f" 

302 

303 if txt_pat not in dynamic_fields: 

304 add_dynamic_field( 

305 txt_pat, 

306 field_type=analyzer_for(lang), 

307 indexed=True, 

308 stored=False, 

309 multi_valued=True, 

310 ) 

311 else: 

312 log.info(f"Dynamic field already present: {txt_pat}") 

313 

314 if f_pat not in dynamic_fields: 

315 # facets: exact string, docValues recommended for performance 

316 add_dynamic_field( 

317 f_pat, 

318 field_type="string", 

319 indexed=True, 

320 stored=True, 

321 multi_valued=True, 

322 docValues=True, 

323 ) 

324 else: 

325 log.info(f"Dynamic field already present: {f_pat}") 

326