Images.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. #include "Images.h"
  2. #include "../Utility/Unescape.h"
  3. #include "../Proxy/Proxy.h"
  4. #include "../Scraping/Scraping.h"
  5. #include <curl/curl.h>
  6. #include <libxml/HTMLparser.h>
  7. #include <libxml/xpath.h>
  8. #include <stdio.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <time.h>
  12. struct MemoryBlock {
  13. char *response;
  14. size_t size;
  15. };
  16. static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb,
  17. void *userp) {
  18. size_t realsize = size * nmemb;
  19. struct MemoryBlock *mem = (struct MemoryBlock *)userp;
  20. char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1);
  21. if (ptr == NULL) {
  22. return 0;
  23. }
  24. mem->response = ptr;
  25. memcpy(&(mem->response[mem->size]), data, realsize);
  26. mem->size += realsize;
  27. mem->response[mem->size] = 0;
  28. return realsize;
  29. }
  30. static char *fetch_images_html(const char *url) {
  31. CURL *curl_handle;
  32. struct MemoryBlock chunk = {.response = malloc(1), .size = 0};
  33. if (!chunk.response) {
  34. return NULL;
  35. }
  36. curl_handle = curl_easy_init();
  37. if (!curl_handle) {
  38. free(chunk.response);
  39. return NULL;
  40. }
  41. curl_easy_setopt(curl_handle, CURLOPT_URL, url);
  42. curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback);
  43. curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
  44. curl_easy_setopt(
  45. curl_handle, CURLOPT_USERAGENT,
  46. "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
  47. curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
  48. curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L);
  49. apply_proxy_settings(curl_handle);
  50. CURLcode res = curl_easy_perform(curl_handle);
  51. if (res != CURLE_OK) {
  52. free(chunk.response);
  53. curl_easy_cleanup(curl_handle);
  54. return NULL;
  55. }
  56. curl_easy_cleanup(curl_handle);
  57. return chunk.response;
  58. }
  59. int images_handler(UrlParams *params) {
  60. TemplateContext ctx = new_context();
  61. char *raw_query = "";
  62. int page = 1;
  63. if (params) {
  64. for (int i = 0; i < params->count; i++) {
  65. if (strcmp(params->params[i].key, "q") == 0) {
  66. raw_query = params->params[i].value;
  67. } else if (strcmp(params->params[i].key, "p") == 0) {
  68. int parsed = atoi(params->params[i].value);
  69. if (parsed > 1) page = parsed;
  70. }
  71. }
  72. }
  73. context_set(&ctx, "query", raw_query);
  74. char page_str[16], prev_str[16], next_str[16];
  75. snprintf(page_str, sizeof(page_str), "%d", page);
  76. snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
  77. snprintf(next_str, sizeof(next_str), "%d", page + 1);
  78. context_set(&ctx, "page", page_str);
  79. context_set(&ctx, "prev_page", prev_str);
  80. context_set(&ctx, "next_page", next_str);
  81. char *display_query = url_decode_query(raw_query);
  82. context_set(&ctx, "query", display_query);
  83. if (!raw_query || strlen(raw_query) == 0) {
  84. send_response("<h1>No query provided</h1>");
  85. if (display_query) free(display_query);
  86. free_context(&ctx);
  87. return -1;
  88. }
  89. CURL *tmp = curl_easy_init();
  90. if (!tmp) {
  91. send_response("<h1>Error initializing curl</h1>");
  92. if (display_query) free(display_query);
  93. free_context(&ctx);
  94. return -1;
  95. }
  96. char *encoded_query = curl_easy_escape(tmp, raw_query, 0);
  97. curl_easy_cleanup(tmp);
  98. if (!encoded_query) {
  99. send_response("<h1>Error encoding query</h1>");
  100. if (display_query) free(display_query);
  101. free_context(&ctx);
  102. return -1;
  103. }
  104. char url[1024];
  105. int first = (page - 1) * 32 + 1;
  106. snprintf(url, sizeof(url),
  107. "https://www.bing.com/images/search?q=%s&first=%d", encoded_query, first);
  108. char *html = fetch_images_html(url);
  109. if (!html) {
  110. send_response("<h1>Error fetching images</h1>");
  111. free(encoded_query);
  112. free(display_query);
  113. free_context(&ctx);
  114. return -1;
  115. }
  116. htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL,
  117. HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
  118. if (!doc) {
  119. free(html);
  120. free(encoded_query);
  121. free(display_query);
  122. free_context(&ctx);
  123. return -1;
  124. }
  125. xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
  126. if (!xpathCtx) {
  127. xmlFreeDoc(doc);
  128. free(html);
  129. free(encoded_query);
  130. free(display_query);
  131. free_context(&ctx);
  132. return -1;
  133. }
  134. xmlXPathObjectPtr xpathObj =
  135. xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
  136. int image_count = 0;
  137. char ***image_matrix = NULL;
  138. int *inner_counts = NULL;
  139. if (xpathObj && xpathObj->nodesetval) {
  140. int nodes = xpathObj->nodesetval->nodeNr;
  141. int max_images = (nodes < 32) ? nodes : 32;
  142. image_matrix = malloc(sizeof(char **) * max_images);
  143. inner_counts = malloc(sizeof(int) * max_images);
  144. for (int i = 0; i < nodes; i++) {
  145. if (image_count >= 32) break;
  146. xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
  147. xmlNodePtr img_node = NULL;
  148. xmlNodePtr tit_node = NULL;
  149. xmlNodePtr des_node = NULL;
  150. xmlNodePtr thumb_link = NULL;
  151. for (xmlNodePtr child = node->children; child; child = child->next) {
  152. if (child->type != XML_ELEMENT_NODE) continue;
  153. if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
  154. xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
  155. if (class) {
  156. if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
  157. thumb_link = child;
  158. for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) {
  159. if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
  160. xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class");
  161. if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
  162. for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) {
  163. if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) {
  164. img_node = cico_child;
  165. break;
  166. }
  167. }
  168. }
  169. if (div_class) xmlFree(div_class);
  170. }
  171. }
  172. } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
  173. tit_node = child;
  174. }
  175. xmlFree(class);
  176. }
  177. } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
  178. xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
  179. if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
  180. for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) {
  181. if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
  182. xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class");
  183. if (div_class) {
  184. if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
  185. des_node = meta_child;
  186. }
  187. xmlFree(div_class);
  188. }
  189. } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
  190. xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
  191. if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
  192. tit_node = meta_child;
  193. }
  194. if (a_class) xmlFree(a_class);
  195. }
  196. }
  197. }
  198. if (class) xmlFree(class);
  199. }
  200. }
  201. xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
  202. xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
  203. xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
  204. xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
  205. if (iurl && strlen((char *)iurl) > 0) {
  206. char *proxy_url = NULL;
  207. CURL *esc_curl = curl_easy_init();
  208. if (esc_curl) {
  209. char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
  210. if (encoded) {
  211. size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
  212. proxy_url = malloc(proxy_len);
  213. if (proxy_url) {
  214. snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
  215. }
  216. curl_free(encoded);
  217. }
  218. curl_easy_cleanup(esc_curl);
  219. }
  220. image_matrix[image_count] = malloc(sizeof(char *) * 4);
  221. image_matrix[image_count][0] = proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
  222. image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
  223. image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
  224. image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#");
  225. inner_counts[image_count] = 4;
  226. image_count++;
  227. }
  228. if (iurl) xmlFree(iurl);
  229. if (title) xmlFree(title);
  230. if (rurl) xmlFree(rurl);
  231. if (full_url) xmlFree(full_url);
  232. }
  233. }
  234. context_set_array_of_arrays(&ctx, "images", image_matrix, image_count,
  235. inner_counts);
  236. char *rendered = render_template("images.html", &ctx);
  237. if (rendered) {
  238. send_response(rendered);
  239. free(rendered);
  240. } else {
  241. send_response("<h1>Error rendering image results</h1>");
  242. }
  243. if (image_matrix) {
  244. for (int i = 0; i < image_count; i++) {
  245. for (int j = 0; j < 4; j++) {
  246. free(image_matrix[i][j]);
  247. }
  248. free(image_matrix[i]);
  249. }
  250. free(image_matrix);
  251. }
  252. if (inner_counts) {
  253. free(inner_counts);
  254. }
  255. if (xpathObj) xmlXPathFreeObject(xpathObj);
  256. if (xpathCtx) xmlXPathFreeContext(xpathCtx);
  257. if (doc) xmlFreeDoc(doc);
  258. free(html);
  259. curl_free(encoded_query);
  260. free(display_query);
  261. free_context(&ctx);
  262. return 0;
  263. }