Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
2.75% |
5 / 182 |
|
10.00% |
1 / 10 |
CRAP | |
0.00% |
0 / 1 |
base | |
2.75% |
5 / 182 |
|
10.00% |
1 / 10 |
3045.51 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
obtain_ids | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
156 | |||
save_ids | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
182 | |||
destroy_cache | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
90 | |||
create_index | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
72 | |||
delete_index | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
56 | |||
forum_ids_with_indexing_enabled | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
get_posts_batch_after | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
get_max_post_id | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
get_type | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * |
4 | * This file is part of the phpBB Forum Software package. |
5 | * |
6 | * @copyright (c) phpBB Limited <https://www.phpbb.com> |
7 | * @license GNU General Public License, version 2 (GPL-2.0) |
8 | * |
9 | * For full copyright and license information, please see |
10 | * the docs/CREDITS.txt file. |
11 | * |
12 | */ |
13 | |
14 | namespace phpbb\search\backend; |
15 | |
16 | use phpbb\cache\service; |
17 | use phpbb\config\config; |
18 | use phpbb\db\driver\driver_interface; |
19 | use phpbb\user; |
20 | |
21 | /** |
22 | * optional base class for search plugins providing simple caching based on ACM |
23 | * and functions to retrieve ignore_words and synonyms |
24 | */ |
25 | abstract class base implements search_backend_interface |
26 | { |
27 | public const SEARCH_RESULT_NOT_IN_CACHE = 0; |
28 | public const SEARCH_RESULT_IN_CACHE = 1; |
29 | public const SEARCH_RESULT_INCOMPLETE = 2; |
30 | |
31 | // Batch size for create_index and delete_index |
32 | private const BATCH_SIZE = 100; |
33 | |
34 | /** |
35 | * @var service |
36 | */ |
37 | protected $cache; |
38 | |
39 | /** |
40 | * @var config |
41 | */ |
42 | protected $config; |
43 | |
44 | /** |
45 | * @var driver_interface |
46 | */ |
47 | protected $db; |
48 | |
49 | /** |
50 | * @var user |
51 | */ |
52 | protected $user; |
53 | |
54 | /** |
55 | * @var string |
56 | */ |
57 | protected $search_results_table; |
58 | |
59 | /** |
60 | * Constructor. |
61 | * |
62 | * @param service $cache |
63 | * @param config $config |
64 | * @param driver_interface $db |
65 | * @param user $user |
66 | * @param string $search_results_table |
67 | */ |
68 | public function __construct(service $cache, config $config, driver_interface $db, user $user, string $search_results_table) |
69 | { |
70 | $this->cache = $cache; |
71 | $this->config = $config; |
72 | $this->db = $db; |
73 | $this->user = $user; |
74 | $this->search_results_table = $search_results_table; |
75 | } |
76 | |
77 | /** |
78 | * Retrieves cached search results |
79 | * |
80 | * @param string $search_key an md5 string generated from all the passed search options to identify the results |
81 | * @param int &$result_count will contain the number of all results for the search (not only for the current page) |
82 | * @param array &$id_ary is filled with the ids belonging to the requested page that are stored in the cache |
83 | * @param int &$start indicates the first index of the page |
84 | * @param int $per_page number of ids each page is supposed to contain |
85 | * @param string $sort_dir is either a or d representing ASC and DESC |
86 | * |
87 | * @return int self::SEARCH_RESULT_NOT_IN_CACHE or self::SEARCH_RESULT_IN_CACHE or self::SEARCH_RESULT_INCOMPLETE |
88 | */ |
89 | protected function obtain_ids(string $search_key, int &$result_count, array &$id_ary, int &$start, int $per_page, string $sort_dir): int |
90 | { |
91 | if (!($stored_ids = $this->cache->get('_search_results_' . $search_key))) |
92 | { |
93 | // no search results cached for this search_key |
94 | return self::SEARCH_RESULT_NOT_IN_CACHE; |
95 | } |
96 | else |
97 | { |
98 | $result_count = $stored_ids[-1]; |
99 | $reverse_ids = $stored_ids[-2] != $sort_dir; |
100 | $complete = true; |
101 | |
102 | // Change start parameter in case out of bounds |
103 | if ($result_count) |
104 | { |
105 | if ($start < 0) |
106 | { |
107 | $start = 0; |
108 | } |
109 | else if ($start >= $result_count) |
110 | { |
111 | $start = floor(($result_count - 1) / $per_page) * $per_page; |
112 | } |
113 | } |
114 | |
115 | // change the start to the actual end of the current request if the sort direction differs |
116 | // from the direction in the cache and reverse the ids later |
117 | if ($reverse_ids) |
118 | { |
119 | $start = $result_count - $start - $per_page; |
120 | |
121 | // the user requested a page past the last index |
122 | if ($start < 0) |
123 | { |
124 | return self::SEARCH_RESULT_NOT_IN_CACHE; |
125 | } |
126 | } |
127 | |
128 | for ($i = $start, $n = $start + $per_page; ($i < $n) && ($i < $result_count); $i++) |
129 | { |
130 | if (!isset($stored_ids[$i])) |
131 | { |
132 | $complete = false; |
133 | } |
134 | else |
135 | { |
136 | $id_ary[] = $stored_ids[$i]; |
137 | } |
138 | } |
139 | unset($stored_ids); |
140 | |
141 | if ($reverse_ids) |
142 | { |
143 | $id_ary = array_reverse($id_ary); |
144 | } |
145 | |
146 | if (!$complete) |
147 | { |
148 | return self::SEARCH_RESULT_INCOMPLETE; |
149 | } |
150 | return self::SEARCH_RESULT_IN_CACHE; |
151 | } |
152 | } |
153 | |
154 | /** |
155 | * Caches post/topic ids |
156 | * |
157 | * @param string $search_key an md5 string generated from all the passed search options to identify the results |
158 | * @param string $keywords contains the keywords as entered by the user |
159 | * @param array $author_ary an array of author ids, if the author should be ignored during the search the array is empty |
160 | * @param int $result_count contains the number of all results for the search (not only for the current page) |
161 | * @param array &$id_ary contains a list of post or topic ids that shall be cached, the first element |
162 | * must have the absolute index $start in the result set. |
163 | * @param int $start indicates the first index of the page |
164 | * @param string $sort_dir is either a or d representing ASC and DESC |
165 | * |
166 | * @return void |
167 | */ |
168 | protected function save_ids(string $search_key, string $keywords, array $author_ary, int $result_count, array &$id_ary, int $start, string $sort_dir): void |
169 | { |
170 | global $user; |
171 | |
172 | $length = min(count($id_ary), $this->config['search_block_size']); |
173 | |
174 | // nothing to cache so exit |
175 | if (!$length) |
176 | { |
177 | return; |
178 | } |
179 | |
180 | $store_ids = array_slice($id_ary, 0, $length); |
181 | |
182 | // create a new resultset if there is none for this search_key yet |
183 | // or add the ids to the existing resultset |
184 | if (!($store = $this->cache->get('_search_results_' . $search_key))) |
185 | { |
186 | // add the current keywords to the recent searches in the cache which are listed on the search page |
187 | if (!empty($keywords) || count($author_ary)) |
188 | { |
189 | $sql = 'SELECT search_time |
190 | FROM ' . $this->search_results_table . ' |
191 | WHERE search_key = \'' . $this->db->sql_escape($search_key) . '\''; |
192 | $result = $this->db->sql_query($sql); |
193 | |
194 | if (!$this->db->sql_fetchrow($result)) |
195 | { |
196 | $sql_ary = array( |
197 | 'search_key' => $search_key, |
198 | 'search_time' => time(), |
199 | 'search_keywords' => $keywords, |
200 | 'search_authors' => ' ' . implode(' ', $author_ary) . ' ' |
201 | ); |
202 | |
203 | $sql = 'INSERT INTO ' . $this->search_results_table . ' ' . $this->db->sql_build_array('INSERT', $sql_ary); |
204 | $this->db->sql_query($sql); |
205 | } |
206 | $this->db->sql_freeresult($result); |
207 | } |
208 | |
209 | $sql = 'UPDATE ' . USERS_TABLE . ' |
210 | SET user_last_search = ' . time() . ' |
211 | WHERE user_id = ' . $user->data['user_id']; |
212 | $this->db->sql_query($sql); |
213 | |
214 | $store = array(-1 => $result_count, -2 => $sort_dir); |
215 | $id_range = range($start, $start + $length - 1); |
216 | } |
217 | else |
218 | { |
219 | // we use one set of results for both sort directions so we have to calculate the indizes |
220 | // for the reversed array and we also have to reverse the ids themselves |
221 | if ($store[-2] != $sort_dir) |
222 | { |
223 | $store_ids = array_reverse($store_ids); |
224 | $id_range = range($store[-1] - $start - $length, $store[-1] - $start - 1); |
225 | } |
226 | else |
227 | { |
228 | $id_range = range($start, $start + $length - 1); |
229 | } |
230 | } |
231 | |
232 | $store_ids = array_combine($id_range, $store_ids); |
233 | |
234 | // append the ids |
235 | if (is_array($store_ids)) |
236 | { |
237 | $store += $store_ids; |
238 | |
239 | // if the cache is too big |
240 | if (count($store) - 2 > 20 * $this->config['search_block_size']) |
241 | { |
242 | // remove everything in front of two blocks in front of the current start index |
243 | for ($i = 0, $n = $id_range[0] - 2 * $this->config['search_block_size']; $i < $n; $i++) |
244 | { |
245 | if (isset($store[$i])) |
246 | { |
247 | unset($store[$i]); |
248 | } |
249 | } |
250 | |
251 | // remove everything after two blocks after the current stop index |
252 | end($id_range); |
253 | for ($i = $store[-1] - 1, $n = current($id_range) + 2 * $this->config['search_block_size']; $i > $n; $i--) |
254 | { |
255 | if (isset($store[$i])) |
256 | { |
257 | unset($store[$i]); |
258 | } |
259 | } |
260 | } |
261 | $this->cache->put('_search_results_' . $search_key, $store, $this->config['search_store_results']); |
262 | |
263 | $sql = 'UPDATE ' . $this->search_results_table . ' |
264 | SET search_time = ' . time() . ' |
265 | WHERE search_key = \'' . $this->db->sql_escape($search_key) . '\''; |
266 | $this->db->sql_query($sql); |
267 | } |
268 | |
269 | unset($store, $store_ids, $id_range); |
270 | } |
271 | |
272 | /** |
273 | * Removes old entries from the search results table and removes searches with keywords that contain a word in $words. |
274 | * |
275 | * @param array $words |
276 | * @param array|bool $authors |
277 | */ |
278 | protected function destroy_cache(array $words, $authors = false): void |
279 | { |
280 | // clear all searches that searched for the specified words |
281 | if (count($words)) |
282 | { |
283 | $sql_where = ''; |
284 | foreach ($words as $word) |
285 | { |
286 | $sql_where .= " OR search_keywords " . $this->db->sql_like_expression($this->db->get_any_char() . $word . $this->db->get_any_char()); |
287 | } |
288 | |
289 | $sql = 'SELECT search_key |
290 | FROM ' . $this->search_results_table . " |
291 | WHERE search_keywords LIKE '%*%' $sql_where"; |
292 | $result = $this->db->sql_query($sql); |
293 | |
294 | while ($row = $this->db->sql_fetchrow($result)) |
295 | { |
296 | $this->cache->destroy('_search_results_' . $row['search_key']); |
297 | } |
298 | $this->db->sql_freeresult($result); |
299 | } |
300 | |
301 | // clear all searches that searched for the specified authors |
302 | if (is_array($authors) && count($authors)) |
303 | { |
304 | $sql_where = ''; |
305 | foreach ($authors as $author) |
306 | { |
307 | $sql_where .= (($sql_where) ? ' OR ' : '') . 'search_authors ' . $this->db->sql_like_expression($this->db->get_any_char() . ' ' . (int) $author . ' ' . $this->db->get_any_char()); |
308 | } |
309 | |
310 | $sql = 'SELECT search_key |
311 | FROM ' . $this->search_results_table . " |
312 | WHERE $sql_where"; |
313 | $result = $this->db->sql_query($sql); |
314 | |
315 | while ($row = $this->db->sql_fetchrow($result)) |
316 | { |
317 | $this->cache->destroy('_search_results_' . $row['search_key']); |
318 | } |
319 | $this->db->sql_freeresult($result); |
320 | } |
321 | |
322 | $sql = 'DELETE |
323 | FROM ' . $this->search_results_table . ' |
324 | WHERE search_time < ' . (time() - (int) $this->config['search_store_results']); |
325 | $this->db->sql_query($sql); |
326 | } |
327 | |
328 | /** |
329 | * {@inheritdoc} |
330 | */ |
331 | public function create_index(int &$post_counter = 0): ?array |
332 | { |
333 | $max_post_id = $this->get_max_post_id(); |
334 | $forums_indexing_enabled = $this->forum_ids_with_indexing_enabled(); |
335 | |
336 | $starttime = microtime(true); |
337 | $row_count = 0; |
338 | |
339 | while (still_on_time() && $post_counter < $max_post_id) |
340 | { |
341 | $rows = $this->get_posts_batch_after($post_counter); |
342 | |
343 | if ($this->db->sql_buffer_nested_transactions()) |
344 | { |
345 | $rows = iterator_to_array($rows); |
346 | } |
347 | |
348 | foreach ($rows as $row) |
349 | { |
350 | // Indexing enabled for this forum |
351 | if (in_array($row['forum_id'], $forums_indexing_enabled, true)) |
352 | { |
353 | $this->index('post', (int) $row['post_id'], $row['post_text'], $row['post_subject'], (int) $row['poster_id'], (int) $row['forum_id']); |
354 | } |
355 | $row_count++; |
356 | $post_counter = (int) $row['post_id']; |
357 | } |
358 | |
359 | // With cli process only one batch each time to be able to track progress |
360 | if (PHP_SAPI === 'cli') |
361 | { |
362 | break; |
363 | } |
364 | } |
365 | |
366 | // pretend the number of posts was as big as the number of ids we indexed so far |
367 | // just an estimation as it includes deleted posts |
368 | $num_posts = $this->config['num_posts']; |
369 | $this->config['num_posts'] = min($this->config['num_posts'], $post_counter); |
370 | $this->tidy(); |
371 | $this->config['num_posts'] = $num_posts; |
372 | |
373 | if ($post_counter < $max_post_id) // If there are still post to index |
374 | { |
375 | $totaltime = microtime(true) - $starttime; |
376 | $rows_per_second = $row_count / $totaltime; |
377 | |
378 | return [ |
379 | 'row_count' => $row_count, |
380 | 'post_counter' => $post_counter, |
381 | 'max_post_id' => $max_post_id, |
382 | 'rows_per_second' => $rows_per_second, |
383 | ]; |
384 | } |
385 | |
386 | return null; |
387 | } |
388 | |
389 | /** |
390 | * {@inheritdoc} |
391 | */ |
392 | public function delete_index(int &$post_counter = null): ?array |
393 | { |
394 | $max_post_id = $this->get_max_post_id(); |
395 | |
396 | $starttime = microtime(true); |
397 | $row_count = 0; |
398 | |
399 | while (still_on_time() && $post_counter < $max_post_id) |
400 | { |
401 | $rows = $this->get_posts_batch_after($post_counter); |
402 | $ids = $posters = $forum_ids = array(); |
403 | foreach ($rows as $row) |
404 | { |
405 | $ids[] = $row['post_id']; |
406 | $posters[] = $row['poster_id']; |
407 | $forum_ids[] = $row['forum_id']; |
408 | } |
409 | $row_count += count($ids); |
410 | |
411 | if (count($ids)) |
412 | { |
413 | $this->index_remove($ids, $posters, $forum_ids); |
414 | $post_counter = $ids[count($ids) - 1]; |
415 | } |
416 | |
417 | // With cli process only one batch each time to be able to track progress |
418 | if (PHP_SAPI === 'cli') |
419 | { |
420 | break; |
421 | } |
422 | } |
423 | |
424 | if ($post_counter < $max_post_id) // If there are still post delete from index |
425 | { |
426 | $totaltime = microtime(true) - $starttime; |
427 | $rows_per_second = $row_count / $totaltime; |
428 | |
429 | return [ |
430 | 'row_count' => $row_count, |
431 | 'post_counter' => $post_counter, |
432 | 'max_post_id' => $max_post_id, |
433 | 'rows_per_second' => $rows_per_second, |
434 | ]; |
435 | } |
436 | |
437 | return null; |
438 | } |
439 | |
440 | /** |
441 | * Return the ids of the forums that have indexing enabled |
442 | * |
443 | * @return array |
444 | */ |
445 | protected function forum_ids_with_indexing_enabled(): array |
446 | { |
447 | $forums = []; |
448 | |
449 | $sql = 'SELECT forum_id, enable_indexing |
450 | FROM ' . FORUMS_TABLE; |
451 | $result = $this->db->sql_query($sql, 3600); |
452 | |
453 | while ($row = $this->db->sql_fetchrow($result)) |
454 | { |
455 | if ((bool) $row['enable_indexing']) |
456 | { |
457 | $forums[] = $row['forum_id']; |
458 | } |
459 | } |
460 | $this->db->sql_freeresult($result); |
461 | |
462 | return $forums; |
463 | } |
464 | |
465 | /** |
466 | * Get batch of posts after id |
467 | * |
468 | * @param int $post_id |
469 | * @return \Generator |
470 | */ |
471 | protected function get_posts_batch_after(int $post_id): \Generator |
472 | { |
473 | $sql = 'SELECT post_id, post_subject, post_text, poster_id, forum_id |
474 | FROM ' . POSTS_TABLE . ' |
475 | WHERE post_id > ' . (int) $post_id . ' |
476 | ORDER BY post_id ASC'; |
477 | $result = $this->db->sql_query_limit($sql, self::BATCH_SIZE); |
478 | |
479 | while ($row = $this->db->sql_fetchrow($result)) |
480 | { |
481 | yield $row; |
482 | } |
483 | |
484 | $this->db->sql_freeresult($result); |
485 | } |
486 | |
487 | /** |
488 | * Get post with higher id |
489 | */ |
490 | protected function get_max_post_id(): int |
491 | { |
492 | $sql = 'SELECT MAX(post_id) as max_post_id |
493 | FROM '. POSTS_TABLE; |
494 | $result = $this->db->sql_query($sql); |
495 | $max_post_id = (int) $this->db->sql_fetchfield('max_post_id'); |
496 | $this->db->sql_freeresult($result); |
497 | |
498 | return $max_post_id; |
499 | } |
500 | |
501 | /** |
502 | * {@inheritdoc} |
503 | */ |
504 | public function get_type(): string |
505 | { |
506 | return static::class; |
507 | } |
508 | } |