Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
2.81% |
5 / 178 |
|
10.00% |
1 / 10 |
CRAP | |
0.00% |
0 / 1 |
base | |
2.81% |
5 / 178 |
|
10.00% |
1 / 10 |
2832.18 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
obtain_ids | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
110 | |||
save_ids | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
182 | |||
destroy_cache | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
90 | |||
create_index | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
72 | |||
delete_index | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
56 | |||
forum_ids_with_indexing_enabled | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
get_posts_batch_after | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
get_max_post_id | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
get_type | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * |
4 | * This file is part of the phpBB Forum Software package. |
5 | * |
6 | * @copyright (c) phpBB Limited <https://www.phpbb.com> |
7 | * @license GNU General Public License, version 2 (GPL-2.0) |
8 | * |
9 | * For full copyright and license information, please see |
10 | * the docs/CREDITS.txt file. |
11 | * |
12 | */ |
13 | |
14 | namespace phpbb\search\backend; |
15 | |
16 | use phpbb\cache\service; |
17 | use phpbb\config\config; |
18 | use phpbb\db\driver\driver_interface; |
19 | use phpbb\user; |
20 | |
21 | /** |
22 | * optional base class for search plugins providing simple caching based on ACM |
23 | * and functions to retrieve ignore_words and synonyms |
24 | */ |
25 | abstract class base implements search_backend_interface |
26 | { |
27 | public const SEARCH_RESULT_NOT_IN_CACHE = 0; |
28 | public const SEARCH_RESULT_IN_CACHE = 1; |
29 | public const SEARCH_RESULT_INCOMPLETE = 2; |
30 | |
31 | // Batch size for create_index and delete_index |
32 | private const BATCH_SIZE = 100; |
33 | |
34 | /** |
35 | * @var service |
36 | */ |
37 | protected $cache; |
38 | |
39 | /** |
40 | * @var config |
41 | */ |
42 | protected $config; |
43 | |
44 | /** |
45 | * @var driver_interface |
46 | */ |
47 | protected $db; |
48 | |
49 | /** |
50 | * @var user |
51 | */ |
52 | protected $user; |
53 | |
54 | /** |
55 | * @var string |
56 | */ |
57 | protected $search_results_table; |
58 | |
59 | /** |
60 | * Constructor. |
61 | * |
62 | * @param service $cache |
63 | * @param config $config |
64 | * @param driver_interface $db |
65 | * @param user $user |
66 | * @param string $search_results_table |
67 | */ |
68 | public function __construct(service $cache, config $config, driver_interface $db, user $user, string $search_results_table) |
69 | { |
70 | $this->cache = $cache; |
71 | $this->config = $config; |
72 | $this->db = $db; |
73 | $this->user = $user; |
74 | $this->search_results_table = $search_results_table; |
75 | } |
76 | |
77 | /** |
78 | * Retrieves cached search results |
79 | * |
80 | * @param string $search_key an md5 string generated from all the passed search options to identify the results |
81 | * @param int &$result_count will contain the number of all results for the search (not only for the current page) |
82 | * @param array &$id_ary is filled with the ids belonging to the requested page that are stored in the cache |
83 | * @param int &$start indicates the first index of the page |
84 | * @param int $per_page number of ids each page is supposed to contain |
85 | * @param string $sort_dir is either a or d representing ASC and DESC |
86 | * |
87 | * @return int self::SEARCH_RESULT_NOT_IN_CACHE or self::SEARCH_RESULT_IN_CACHE or self::SEARCH_RESULT_INCOMPLETE |
88 | */ |
89 | protected function obtain_ids(string $search_key, int &$result_count, array &$id_ary, int &$start, int $per_page, string $sort_dir): int |
90 | { |
91 | if (!($stored_ids = $this->cache->get('_search_results_' . $search_key))) |
92 | { |
93 | // no search results cached for this search_key |
94 | return self::SEARCH_RESULT_NOT_IN_CACHE; |
95 | } |
96 | else |
97 | { |
98 | $result_count = $stored_ids[-1]; |
99 | $reverse_ids = $stored_ids[-2] != $sort_dir; |
100 | $complete = true; |
101 | |
102 | // Change start parameter in case out of bounds |
103 | if ($result_count) |
104 | { |
105 | if ($start < 0) |
106 | { |
107 | $start = 0; |
108 | } |
109 | else if ($start >= $result_count) |
110 | { |
111 | $start = floor(($result_count - 1) / $per_page) * $per_page; |
112 | } |
113 | } |
114 | |
115 | // If the sort direction differs from the direction in the cache, then reverse the ids array |
116 | if ($reverse_ids) |
117 | { |
118 | $stored_ids = array_reverse($stored_ids); |
119 | } |
120 | |
121 | for ($i = $start, $n = $start + $per_page; ($i < $n) && ($i < $result_count); $i++) |
122 | { |
123 | if (!isset($stored_ids[$i])) |
124 | { |
125 | $complete = false; |
126 | } |
127 | else |
128 | { |
129 | $id_ary[] = $stored_ids[$i]; |
130 | } |
131 | } |
132 | unset($stored_ids); |
133 | |
134 | if (!$complete) |
135 | { |
136 | return self::SEARCH_RESULT_INCOMPLETE; |
137 | } |
138 | return self::SEARCH_RESULT_IN_CACHE; |
139 | } |
140 | } |
141 | |
142 | /** |
143 | * Caches post/topic ids |
144 | * |
145 | * @param string $search_key an md5 string generated from all the passed search options to identify the results |
146 | * @param string $keywords contains the keywords as entered by the user |
147 | * @param array $author_ary an array of author ids, if the author should be ignored during the search the array is empty |
148 | * @param int $result_count contains the number of all results for the search (not only for the current page) |
149 | * @param array &$id_ary contains a list of post or topic ids that shall be cached, the first element |
150 | * must have the absolute index $start in the result set. |
151 | * @param int $start indicates the first index of the page |
152 | * @param string $sort_dir is either a or d representing ASC and DESC |
153 | * |
154 | * @return void |
155 | */ |
156 | protected function save_ids(string $search_key, string $keywords, array $author_ary, int $result_count, array &$id_ary, int $start, string $sort_dir): void |
157 | { |
158 | global $user; |
159 | |
160 | $length = min(count($id_ary), $this->config['search_block_size']); |
161 | |
162 | // nothing to cache so exit |
163 | if (!$length) |
164 | { |
165 | return; |
166 | } |
167 | |
168 | $store_ids = array_slice($id_ary, 0, $length); |
169 | |
170 | // create a new resultset if there is none for this search_key yet |
171 | // or add the ids to the existing resultset |
172 | if (!($store = $this->cache->get('_search_results_' . $search_key))) |
173 | { |
174 | // add the current keywords to the recent searches in the cache which are listed on the search page |
175 | if (!empty($keywords) || count($author_ary)) |
176 | { |
177 | $sql = 'SELECT search_time |
178 | FROM ' . $this->search_results_table . ' |
179 | WHERE search_key = \'' . $this->db->sql_escape($search_key) . '\''; |
180 | $result = $this->db->sql_query($sql); |
181 | |
182 | if (!$this->db->sql_fetchrow($result)) |
183 | { |
184 | $sql_ary = array( |
185 | 'search_key' => $search_key, |
186 | 'search_time' => time(), |
187 | 'search_keywords' => $keywords, |
188 | 'search_authors' => ' ' . implode(' ', $author_ary) . ' ' |
189 | ); |
190 | |
191 | $sql = 'INSERT INTO ' . $this->search_results_table . ' ' . $this->db->sql_build_array('INSERT', $sql_ary); |
192 | $this->db->sql_query($sql); |
193 | } |
194 | $this->db->sql_freeresult($result); |
195 | } |
196 | |
197 | $sql = 'UPDATE ' . USERS_TABLE . ' |
198 | SET user_last_search = ' . time() . ' |
199 | WHERE user_id = ' . $user->data['user_id']; |
200 | $this->db->sql_query($sql); |
201 | |
202 | $store = array(-1 => $result_count, -2 => $sort_dir); |
203 | $id_range = range($start, $start + $length - 1); |
204 | } |
205 | else |
206 | { |
207 | // we use one set of results for both sort directions so we have to calculate the indizes |
208 | // for the reversed array and we also have to reverse the ids themselves |
209 | if ($store[-2] != $sort_dir) |
210 | { |
211 | $store_ids = array_reverse($store_ids); |
212 | $id_range = range($store[-1] - $start - $length, $store[-1] - $start - 1); |
213 | } |
214 | else |
215 | { |
216 | $id_range = range($start, $start + $length - 1); |
217 | } |
218 | } |
219 | |
220 | $store_ids = array_combine($id_range, $store_ids); |
221 | |
222 | // append the ids |
223 | if (is_array($store_ids)) |
224 | { |
225 | $store += $store_ids; |
226 | |
227 | // if the cache is too big |
228 | if (count($store) - 2 > 20 * $this->config['search_block_size']) |
229 | { |
230 | // remove everything in front of two blocks in front of the current start index |
231 | for ($i = 0, $n = $id_range[0] - 2 * $this->config['search_block_size']; $i < $n; $i++) |
232 | { |
233 | if (isset($store[$i])) |
234 | { |
235 | unset($store[$i]); |
236 | } |
237 | } |
238 | |
239 | // remove everything after two blocks after the current stop index |
240 | end($id_range); |
241 | for ($i = $store[-1] - 1, $n = current($id_range) + 2 * $this->config['search_block_size']; $i > $n; $i--) |
242 | { |
243 | if (isset($store[$i])) |
244 | { |
245 | unset($store[$i]); |
246 | } |
247 | } |
248 | } |
249 | $this->cache->put('_search_results_' . $search_key, $store, $this->config['search_store_results']); |
250 | |
251 | $sql = 'UPDATE ' . $this->search_results_table . ' |
252 | SET search_time = ' . time() . ' |
253 | WHERE search_key = \'' . $this->db->sql_escape($search_key) . '\''; |
254 | $this->db->sql_query($sql); |
255 | } |
256 | |
257 | unset($store, $store_ids, $id_range); |
258 | } |
259 | |
260 | /** |
261 | * Removes old entries from the search results table and removes searches with keywords that contain a word in $words. |
262 | * |
263 | * @param array $words |
264 | * @param array|bool $authors |
265 | */ |
266 | protected function destroy_cache(array $words, $authors = false): void |
267 | { |
268 | // clear all searches that searched for the specified words |
269 | if (count($words)) |
270 | { |
271 | $sql_where = ''; |
272 | foreach ($words as $word) |
273 | { |
274 | $sql_where .= " OR search_keywords " . $this->db->sql_like_expression($this->db->get_any_char() . $word . $this->db->get_any_char()); |
275 | } |
276 | |
277 | $sql = 'SELECT search_key |
278 | FROM ' . $this->search_results_table . " |
279 | WHERE search_keywords LIKE '%*%' $sql_where"; |
280 | $result = $this->db->sql_query($sql); |
281 | |
282 | while ($row = $this->db->sql_fetchrow($result)) |
283 | { |
284 | $this->cache->destroy('_search_results_' . $row['search_key']); |
285 | } |
286 | $this->db->sql_freeresult($result); |
287 | } |
288 | |
289 | // clear all searches that searched for the specified authors |
290 | if (is_array($authors) && count($authors)) |
291 | { |
292 | $sql_where = ''; |
293 | foreach ($authors as $author) |
294 | { |
295 | $sql_where .= (($sql_where) ? ' OR ' : '') . 'search_authors ' . $this->db->sql_like_expression($this->db->get_any_char() . ' ' . (int) $author . ' ' . $this->db->get_any_char()); |
296 | } |
297 | |
298 | $sql = 'SELECT search_key |
299 | FROM ' . $this->search_results_table . " |
300 | WHERE $sql_where"; |
301 | $result = $this->db->sql_query($sql); |
302 | |
303 | while ($row = $this->db->sql_fetchrow($result)) |
304 | { |
305 | $this->cache->destroy('_search_results_' . $row['search_key']); |
306 | } |
307 | $this->db->sql_freeresult($result); |
308 | } |
309 | |
310 | $sql = 'DELETE |
311 | FROM ' . $this->search_results_table . ' |
312 | WHERE search_time < ' . (time() - (int) $this->config['search_store_results']); |
313 | $this->db->sql_query($sql); |
314 | } |
315 | |
316 | /** |
317 | * {@inheritdoc} |
318 | */ |
319 | public function create_index(int &$post_counter = 0): ?array |
320 | { |
321 | $max_post_id = $this->get_max_post_id(); |
322 | $forums_indexing_enabled = $this->forum_ids_with_indexing_enabled(); |
323 | |
324 | $starttime = microtime(true); |
325 | $row_count = 0; |
326 | |
327 | while (still_on_time() && $post_counter < $max_post_id) |
328 | { |
329 | $rows = $this->get_posts_batch_after($post_counter); |
330 | |
331 | if ($this->db->sql_buffer_nested_transactions()) |
332 | { |
333 | $rows = iterator_to_array($rows); |
334 | } |
335 | |
336 | foreach ($rows as $row) |
337 | { |
338 | // Indexing enabled for this forum |
339 | if (in_array($row['forum_id'], $forums_indexing_enabled, true)) |
340 | { |
341 | $this->index('post', (int) $row['post_id'], $row['post_text'], $row['post_subject'], (int) $row['poster_id'], (int) $row['forum_id']); |
342 | } |
343 | $row_count++; |
344 | $post_counter = (int) $row['post_id']; |
345 | } |
346 | |
347 | // With cli process only one batch each time to be able to track progress |
348 | if (PHP_SAPI === 'cli') |
349 | { |
350 | break; |
351 | } |
352 | } |
353 | |
354 | // pretend the number of posts was as big as the number of ids we indexed so far |
355 | // just an estimation as it includes deleted posts |
356 | $num_posts = $this->config['num_posts']; |
357 | $this->config['num_posts'] = min($this->config['num_posts'], $post_counter); |
358 | $this->tidy(); |
359 | $this->config['num_posts'] = $num_posts; |
360 | |
361 | if ($post_counter < $max_post_id) // If there are still post to index |
362 | { |
363 | $totaltime = microtime(true) - $starttime; |
364 | $rows_per_second = $row_count / $totaltime; |
365 | |
366 | return [ |
367 | 'row_count' => $row_count, |
368 | 'post_counter' => $post_counter, |
369 | 'max_post_id' => $max_post_id, |
370 | 'rows_per_second' => $rows_per_second, |
371 | ]; |
372 | } |
373 | |
374 | return null; |
375 | } |
376 | |
377 | /** |
378 | * {@inheritdoc} |
379 | */ |
380 | public function delete_index(int &$post_counter = null): ?array |
381 | { |
382 | $max_post_id = $this->get_max_post_id(); |
383 | |
384 | $starttime = microtime(true); |
385 | $row_count = 0; |
386 | |
387 | while (still_on_time() && $post_counter < $max_post_id) |
388 | { |
389 | $rows = $this->get_posts_batch_after($post_counter); |
390 | $ids = $posters = $forum_ids = array(); |
391 | foreach ($rows as $row) |
392 | { |
393 | $ids[] = $row['post_id']; |
394 | $posters[] = $row['poster_id']; |
395 | $forum_ids[] = $row['forum_id']; |
396 | } |
397 | $row_count += count($ids); |
398 | |
399 | if (count($ids)) |
400 | { |
401 | $this->index_remove($ids, $posters, $forum_ids); |
402 | $post_counter = $ids[count($ids) - 1]; |
403 | } |
404 | |
405 | // With cli process only one batch each time to be able to track progress |
406 | if (PHP_SAPI === 'cli') |
407 | { |
408 | break; |
409 | } |
410 | } |
411 | |
412 | if ($post_counter < $max_post_id) // If there are still post delete from index |
413 | { |
414 | $totaltime = microtime(true) - $starttime; |
415 | $rows_per_second = $row_count / $totaltime; |
416 | |
417 | return [ |
418 | 'row_count' => $row_count, |
419 | 'post_counter' => $post_counter, |
420 | 'max_post_id' => $max_post_id, |
421 | 'rows_per_second' => $rows_per_second, |
422 | ]; |
423 | } |
424 | |
425 | return null; |
426 | } |
427 | |
428 | /** |
429 | * Return the ids of the forums that have indexing enabled |
430 | * |
431 | * @return array |
432 | */ |
433 | protected function forum_ids_with_indexing_enabled(): array |
434 | { |
435 | $forums = []; |
436 | |
437 | $sql = 'SELECT forum_id, enable_indexing |
438 | FROM ' . FORUMS_TABLE; |
439 | $result = $this->db->sql_query($sql, 3600); |
440 | |
441 | while ($row = $this->db->sql_fetchrow($result)) |
442 | { |
443 | if ((bool) $row['enable_indexing']) |
444 | { |
445 | $forums[] = $row['forum_id']; |
446 | } |
447 | } |
448 | $this->db->sql_freeresult($result); |
449 | |
450 | return $forums; |
451 | } |
452 | |
453 | /** |
454 | * Get batch of posts after id |
455 | * |
456 | * @param int $post_id |
457 | * @return \Generator |
458 | */ |
459 | protected function get_posts_batch_after(int $post_id): \Generator |
460 | { |
461 | $sql = 'SELECT post_id, post_subject, post_text, poster_id, forum_id |
462 | FROM ' . POSTS_TABLE . ' |
463 | WHERE post_id > ' . (int) $post_id . ' |
464 | ORDER BY post_id ASC'; |
465 | $result = $this->db->sql_query_limit($sql, self::BATCH_SIZE); |
466 | |
467 | while ($row = $this->db->sql_fetchrow($result)) |
468 | { |
469 | yield $row; |
470 | } |
471 | |
472 | $this->db->sql_freeresult($result); |
473 | } |
474 | |
475 | /** |
476 | * Get post with higher id |
477 | */ |
478 | protected function get_max_post_id(): int |
479 | { |
480 | $sql = 'SELECT MAX(post_id) as max_post_id |
481 | FROM '. POSTS_TABLE; |
482 | $result = $this->db->sql_query($sql); |
483 | $max_post_id = (int) $this->db->sql_fetchfield('max_post_id'); |
484 | $this->db->sql_freeresult($result); |
485 | |
486 | return $max_post_id; |
487 | } |
488 | |
489 | /** |
490 | * {@inheritdoc} |
491 | */ |
492 | public function get_type(): string |
493 | { |
494 | return static::class; |
495 | } |
496 | } |