Search.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #include "Search.h"
  2. #include "../Infobox/Wikipedia.h"
  3. #include "../Infobox/Calculator.h"
  4. #include "../Infobox/Dictionary.h"
  5. #include "../Scraping/Scraping.h"
  6. #include "../Utility/Display.h"
  7. #include "../Utility/Unescape.h"
  8. #include <ctype.h>
  9. #include <pthread.h>
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include <time.h>
  14. typedef struct {
  15. const char *query;
  16. InfoBox result;
  17. int success;
  18. } InfoBoxThreadData;
  19. static void *wiki_thread_func(void *arg) {
  20. InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
  21. char *dynamic_url = construct_wiki_url(data->query);
  22. if (dynamic_url) {
  23. data->result = fetch_wiki_data(dynamic_url);
  24. data->success =
  25. (data->result.title != NULL && data->result.extract != NULL &&
  26. strlen(data->result.extract) > 10);
  27. free(dynamic_url);
  28. } else {
  29. data->success = 0;
  30. }
  31. return NULL;
  32. }
  33. static int is_calculator_query(const char *query) {
  34. if (!query) return 0;
  35. int has_digit = 0;
  36. int has_operator = 0;
  37. for (const char *p = query; *p; p++) {
  38. if (isdigit(*p) || *p == '.') {
  39. has_digit = 1;
  40. }
  41. if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '=' ||
  42. *p == '^') {
  43. has_operator = 1;
  44. }
  45. }
  46. return has_digit && has_operator;
  47. }
  48. static void *calc_thread_func(void *arg) {
  49. InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
  50. if (is_calculator_query(data->query)) {
  51. data->result = fetch_calc_data((char *)data->query);
  52. data->success =
  53. (data->result.title != NULL && data->result.extract != NULL);
  54. } else {
  55. data->success = 0;
  56. }
  57. return NULL;
  58. }
  59. static void *dict_thread_func(void *arg) {
  60. InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
  61. if (is_dictionary_query(data->query)) {
  62. data->result = fetch_dictionary_data(data->query);
  63. data->success =
  64. (data->result.title != NULL && data->result.extract != NULL);
  65. } else {
  66. data->success = 0;
  67. }
  68. return NULL;
  69. }
  70. static int add_infobox_to_collection(InfoBox *infobox, char ****collection,
  71. int **inner_counts, int current_count) {
  72. *collection =
  73. (char ***)realloc(*collection, sizeof(char **) * (current_count + 1));
  74. *inner_counts =
  75. (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
  76. (*collection)[current_count] = (char **)malloc(sizeof(char *) * 4);
  77. (*collection)[current_count][0] = infobox->title ? strdup(infobox->title) : NULL;
  78. (*collection)[current_count][1] = infobox->thumbnail_url ? strdup(infobox->thumbnail_url) : NULL;
  79. (*collection)[current_count][2] = infobox->extract ? strdup(infobox->extract) : NULL;
  80. (*collection)[current_count][3] = infobox->url ? strdup(infobox->url) : NULL;
  81. (*inner_counts)[current_count] = 4;
  82. return current_count + 1;
  83. }
  84. int results_handler(UrlParams *params) {
  85. TemplateContext ctx = new_context();
  86. char *raw_query = "";
  87. int page = 1;
  88. if (params) {
  89. for (int i = 0; i < params->count; i++) {
  90. if (strcmp(params->params[i].key, "q") == 0) {
  91. raw_query = params->params[i].value;
  92. } else if (strcmp(params->params[i].key, "p") == 0) {
  93. int parsed = atoi(params->params[i].value);
  94. if (parsed > 1) page = parsed;
  95. }
  96. }
  97. }
  98. context_set(&ctx, "query", raw_query);
  99. char page_str[16], prev_str[16], next_str[16];
  100. snprintf(page_str, sizeof(page_str), "%d", page);
  101. snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
  102. snprintf(next_str, sizeof(next_str), "%d", page + 1);
  103. context_set(&ctx, "page", page_str);
  104. context_set(&ctx, "prev_page", prev_str);
  105. context_set(&ctx, "next_page", next_str);
  106. if (!raw_query || strlen(raw_query) == 0) {
  107. send_response("<h1>No query provided</h1>");
  108. free_context(&ctx);
  109. return -1;
  110. }
  111. pthread_t wiki_tid, calc_tid, dict_tid;
  112. InfoBoxThreadData wiki_data = {.query = raw_query, .success = 0};
  113. InfoBoxThreadData calc_data = {.query = raw_query, .success = 0};
  114. InfoBoxThreadData dict_data = {.query = raw_query, .success = 0};
  115. if (page == 1) {
  116. pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data);
  117. pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data);
  118. pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data);
  119. }
  120. ScrapeJob jobs[ENGINE_COUNT];
  121. SearchResult *all_results[ENGINE_COUNT];
  122. for (int i = 0; i < ENGINE_COUNT; i++) {
  123. all_results[i] = NULL;
  124. jobs[i].engine = &ENGINE_REGISTRY[i];
  125. jobs[i].query = raw_query;
  126. jobs[i].out_results = &all_results[i];
  127. jobs[i].max_results = 10;
  128. jobs[i].results_count = 0;
  129. jobs[i].page = page;
  130. jobs[i].handle = NULL;
  131. jobs[i].response.memory = NULL;
  132. jobs[i].response.size = 0;
  133. jobs[i].response.capacity = 0;
  134. }
  135. scrape_engines_parallel(jobs, ENGINE_COUNT);
  136. if (page == 1) {
  137. pthread_join(wiki_tid, NULL);
  138. pthread_join(calc_tid, NULL);
  139. pthread_join(dict_tid, NULL);
  140. }
  141. char ***infobox_matrix = NULL;
  142. int *infobox_inner_counts = NULL;
  143. int infobox_count = 0;
  144. if (page == 1) {
  145. if (dict_data.success) {
  146. infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix,
  147. &infobox_inner_counts, infobox_count);
  148. }
  149. if (calc_data.success) {
  150. infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix,
  151. &infobox_inner_counts, infobox_count);
  152. }
  153. if (wiki_data.success) {
  154. infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
  155. &infobox_inner_counts, infobox_count);
  156. }
  157. }
  158. if (infobox_count > 0) {
  159. context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
  160. infobox_count, infobox_inner_counts);
  161. for (int i = 0; i < infobox_count; i++) {
  162. for (int j = 0; j < 4; j++) free(infobox_matrix[i][j]);
  163. free(infobox_matrix[i]);
  164. }
  165. free(infobox_matrix);
  166. free(infobox_inner_counts);
  167. }
  168. int total_results = 0;
  169. for (int i = 0; i < ENGINE_COUNT; i++) {
  170. total_results += jobs[i].results_count;
  171. }
  172. if (total_results > 0) {
  173. char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results);
  174. int *results_inner_counts = (int *)malloc(sizeof(int) * total_results);
  175. char **seen_urls = (char **)malloc(sizeof(char *) * total_results);
  176. int unique_count = 0;
  177. for (int i = 0; i < ENGINE_COUNT; i++) {
  178. for (int j = 0; j < jobs[i].results_count; j++) {
  179. char *display_url = all_results[i][j].url;
  180. int is_duplicate = 0;
  181. for (int k = 0; k < unique_count; k++) {
  182. if (strcmp(seen_urls[k], display_url) == 0) {
  183. is_duplicate = 1;
  184. break;
  185. }
  186. }
  187. if (is_duplicate) {
  188. free(all_results[i][j].url);
  189. free(all_results[i][j].title);
  190. free(all_results[i][j].snippet);
  191. continue;
  192. }
  193. seen_urls[unique_count] = strdup(display_url);
  194. results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
  195. char *pretty_url = pretty_display_url(display_url);
  196. results_matrix[unique_count][0] = strdup(display_url);
  197. results_matrix[unique_count][1] = strdup(pretty_url);
  198. results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled");
  199. results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup("");
  200. results_inner_counts[unique_count] = 4;
  201. free(pretty_url);
  202. free(all_results[i][j].url);
  203. free(all_results[i][j].title);
  204. free(all_results[i][j].snippet);
  205. unique_count++;
  206. }
  207. free(all_results[i]);
  208. }
  209. context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts);
  210. char *html = render_template("results.html", &ctx);
  211. if (html) {
  212. send_response(html);
  213. free(html);
  214. }
  215. for (int i = 0; i < unique_count; i++) {
  216. for (int j = 0; j < 4; j++) free(results_matrix[i][j]);
  217. free(results_matrix[i]);
  218. free(seen_urls[i]);
  219. }
  220. free(seen_urls);
  221. free(results_matrix);
  222. free(results_inner_counts);
  223. } else {
  224. char *html = render_template("results.html", &ctx);
  225. if (html) {
  226. send_response(html);
  227. free(html);
  228. }
  229. }
  230. if (page == 1) {
  231. if (wiki_data.success) free_infobox(&wiki_data.result);
  232. if (calc_data.success) free_infobox(&calc_data.result);
  233. if (dict_data.success) free_infobox(&dict_data.result);
  234. }
  235. free_context(&ctx);
  236. return 0;
  237. }