Browse Source

fix: general readability improvements

frosty 1 day ago
parent
commit
8c6632502f
10 changed files with 162 additions and 74 deletions
  1. 22 19
      src/Cache/Cache.c
  2. 0 1
      src/Config.c
  3. 22 0
      src/Config.h
  4. 2 1
      src/Infobox/Wikipedia.c
  5. 32 35
      src/Main.c
  6. 1 3
      src/Routes/Search.c
  7. 15 11
      src/Scraping/Scraping.c
  8. 2 1
      src/Utility/Display.c
  9. 50 3
      src/Utility/HttpClient.c
  10. 16 0
      src/Utility/HttpClient.h

+ 22 - 19
src/Cache/Cache.c

@@ -1,4 +1,5 @@
 #include "Cache.h"
 #include "Cache.h"
+#include "Config.h"
 #include <dirent.h>
 #include <dirent.h>
 #include <openssl/evp.h>
 #include <openssl/evp.h>
 #include <stdio.h>
 #include <stdio.h>
@@ -7,9 +8,9 @@
 #include <sys/stat.h>
 #include <sys/stat.h>
 #include <time.h>
 #include <time.h>
 
 
-static char cache_dir[512] = {0};
-static int cache_ttl_search_val = 3600;
-static int cache_ttl_infobox_val = 86400;
+static char cache_dir[BUFFER_SIZE_MEDIUM] = {0};
+static int cache_ttl_search_val = DEFAULT_CACHE_TTL_SEARCH;
+static int cache_ttl_infobox_val = DEFAULT_CACHE_TTL_INFOBOX;
 
 
 void set_cache_ttl_search(int ttl) { cache_ttl_search_val = ttl; }
 void set_cache_ttl_search(int ttl) { cache_ttl_search_val = ttl; }
 
 
@@ -44,7 +45,8 @@ static time_t get_file_mtime(const char *filepath) {
 
 
 int cache_init(const char *dir) {
 int cache_init(const char *dir) {
   if (!dir || strlen(dir) == 0) {
   if (!dir || strlen(dir) == 0) {
-    strcpy(cache_dir, "/tmp/omnisearch_cache");
+    strncpy(cache_dir, DEFAULT_CACHE_DIR, sizeof(cache_dir) - 1);
+    cache_dir[sizeof(cache_dir) - 1] = '\0';
   } else {
   } else {
     strncpy(cache_dir, dir, sizeof(cache_dir) - 1);
     strncpy(cache_dir, dir, sizeof(cache_dir) - 1);
     cache_dir[sizeof(cache_dir) - 1] = '\0';
     cache_dir[sizeof(cache_dir) - 1] = '\0';
@@ -53,19 +55,20 @@ int cache_init(const char *dir) {
   struct stat st;
   struct stat st;
   if (stat(cache_dir, &st) != 0) {
   if (stat(cache_dir, &st) != 0) {
     if (mkdir(cache_dir, 0755) != 0) {
     if (mkdir(cache_dir, 0755) != 0) {
-      fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir);
+      fprintf(stderr, "[ERROR] Failed to create cache directory: %s\n",
+              cache_dir);
       return -1;
       return -1;
     }
     }
   } else if (!S_ISDIR(st.st_mode)) {
   } else if (!S_ISDIR(st.st_mode)) {
-    fprintf(stderr, "Cache path exists but is not a directory: %s\n",
+    fprintf(stderr, "[ERROR] Cache path exists but is not a directory: %s\n",
             cache_dir);
             cache_dir);
     return -1;
     return -1;
   }
   }
 
 
-  char subdirs[] = "0123456789abcdef";
-  for (int i = 0; subdirs[i]; i++) {
-    char subdir_path[1024];
-    snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, subdirs[i]);
+  for (int i = 0; HEX_CHARS[i]; i++) {
+    char subdir_path[BUFFER_SIZE_LARGE];
+    snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir,
+             HEX_CHARS[i]);
     if (stat(subdir_path, &st) != 0) {
     if (stat(subdir_path, &st) != 0) {
       mkdir(subdir_path, 0755);
       mkdir(subdir_path, 0755);
     }
     }
@@ -77,11 +80,11 @@ int cache_init(const char *dir) {
 void cache_shutdown(void) { cache_dir[0] = '\0'; }
 void cache_shutdown(void) { cache_dir[0] = '\0'; }
 
 
 char *cache_compute_key(const char *query, int page, const char *engine_name) {
 char *cache_compute_key(const char *query, int page, const char *engine_name) {
-  char key_buffer[1024];
+  char key_buffer[BUFFER_SIZE_LARGE];
   snprintf(key_buffer, sizeof(key_buffer), "%s_%d_%s", query ? query : "", page,
   snprintf(key_buffer, sizeof(key_buffer), "%s_%d_%s", query ? query : "", page,
            engine_name ? engine_name : "");
            engine_name ? engine_name : "");
 
 
-  char *hash = malloc(33);
+  char *hash = malloc(MD5_HASH_LEN + 1);
   if (!hash) {
   if (!hash) {
     return NULL;
     return NULL;
   }
   }
@@ -95,7 +98,7 @@ int cache_get(const char *key, time_t max_age, char **out_data,
     return -1;
     return -1;
   }
   }
 
 
-  char filepath[1024];
+  char filepath[BUFFER_SIZE_LARGE];
   snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0],
   snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0],
            key);
            key);
 
 
@@ -149,7 +152,7 @@ int cache_set(const char *key, const char *data, size_t size) {
     return -1;
     return -1;
   }
   }
 
 
-  char filepath[1024];
+  char filepath[BUFFER_SIZE_LARGE];
   snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0],
   snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0],
            key);
            key);
 
 
@@ -176,11 +179,11 @@ void cache_cleanup(time_t max_age) {
 
 
   time_t now = time(NULL);
   time_t now = time(NULL);
   time_t cutoff = now - max_age;
   time_t cutoff = now - max_age;
-  char subdirs[] = "0123456789abcdef";
 
 
-  for (int d = 0; subdirs[d]; d++) {
-    char subdir_path[1024];
-    snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, subdirs[d]);
+  for (int d = 0; HEX_CHARS[d]; d++) {
+    char subdir_path[BUFFER_SIZE_LARGE];
+    snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir,
+             HEX_CHARS[d]);
 
 
     DIR *dir = opendir(subdir_path);
     DIR *dir = opendir(subdir_path);
     if (!dir)
     if (!dir)
@@ -190,7 +193,7 @@ void cache_cleanup(time_t max_age) {
     while ((entry = readdir(dir)) != NULL) {
     while ((entry = readdir(dir)) != NULL) {
       size_t len = strlen(entry->d_name);
       size_t len = strlen(entry->d_name);
       if (len > 7 && strcmp(entry->d_name + len - 7, ".cache") == 0) {
       if (len > 7 && strcmp(entry->d_name + len - 7, ".cache") == 0) {
-        char filepath[2048];
+        char filepath[BUFFER_SIZE_XLARGE];
         snprintf(filepath, sizeof(filepath), "%s/%s", subdir_path,
         snprintf(filepath, sizeof(filepath), "%s/%s", subdir_path,
                  entry->d_name);
                  entry->d_name);
 
 

+ 0 - 1
src/Config.c

@@ -13,7 +13,6 @@ int load_config(const char *filename, Config *config) {
   char section[64] = "";
   char section[64] = "";
 
 
   while (fgets(line, sizeof(line), file)) {
   while (fgets(line, sizeof(line), file)) {
-
     line[strcspn(line, "\r\n")] = 0;
     line[strcspn(line, "\r\n")] = 0;
 
 
     if (line[0] == '\0' || line[0] == '#' || line[0] == ';') {
     if (line[0] == '\0' || line[0] == '#' || line[0] == ';') {

+ 22 - 0
src/Config.h

@@ -1,6 +1,28 @@
 #ifndef CONFIG_H
 #ifndef CONFIG_H
 #define CONFIG_H
 #define CONFIG_H
 
 
+#define DEFAULT_HOST "0.0.0.0"
+#define DEFAULT_PORT 5000
+#define DEFAULT_CACHE_DIR "/tmp/omnisearch_cache"
+#define DEFAULT_CACHE_TTL_SEARCH 3600
+#define DEFAULT_CACHE_TTL_INFOBOX 86400
+#define DEFAULT_MAX_PROXY_RETRIES 3
+
+#define BUFFER_SIZE_SMALL 256
+#define BUFFER_SIZE_MEDIUM 512
+#define BUFFER_SIZE_LARGE 1024
+#define BUFFER_SIZE_XLARGE 2048
+
+#define INITIAL_BUFFER_SIZE 16384
+
+#define WIKI_SUMMARY_MAX_CHARS 300
+
+#define MD5_HASH_LEN 32
+#define HEX_CHARS "0123456789abcdef"
+
+#define INFOBOX_FIELD_COUNT 4
+#define MAX_RESULTS_PER_ENGINE 10
+
 typedef struct {
 typedef struct {
   char host[256];
   char host[256];
   int port;
   int port;

+ 2 - 1
src/Infobox/Wikipedia.c

@@ -2,6 +2,7 @@
 #include "../Cache/Cache.h"
 #include "../Cache/Cache.h"
 #include "../Scraping/Scraping.h"
 #include "../Scraping/Scraping.h"
 #include "../Utility/HttpClient.h"
 #include "../Utility/HttpClient.h"
+#include "Config.h"
 #include <curl/curl.h>
 #include <curl/curl.h>
 #include <libxml/parser.h>
 #include <libxml/parser.h>
 #include <libxml/tree.h>
 #include <libxml/tree.h>
@@ -91,7 +92,7 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
           }
           }
           info->extract = strdup((const char *)content);
           info->extract = strdup((const char *)content);
 
 
-          shorten_summary(&(info->extract), 300);
+          shorten_summary(&(info->extract), WIKI_SUMMARY_MAX_CHARS);
           xmlFree(content);
           xmlFree(content);
         }
         }
       }
       }

+ 32 - 35
src/Main.c

@@ -33,50 +33,47 @@ int main() {
 
 
   curl_global_init(CURL_GLOBAL_DEFAULT);
   curl_global_init(CURL_GLOBAL_DEFAULT);
 
 
-  Config config = {.host = "0.0.0.0",
-                   .port = 5000,
-                   .proxy = "",
-                   .proxy_list_file = "",
-                   .max_proxy_retries = 3,
-                   .randomize_username = 0,
-                   .randomize_password = 0,
-                   .cache_dir = "/tmp/omnisearch_cache",
-                   .cache_ttl_search = 3600,
-                   .cache_ttl_infobox = 86400};
-
-  if (load_config("config.ini", &config) != 0) {
-    fprintf(stderr, "Warning: Could not load config file, using defaults\n");
+  Config cfg = {.host = DEFAULT_HOST,
+                .port = DEFAULT_PORT,
+                .proxy = "",
+                .proxy_list_file = "",
+                .max_proxy_retries = DEFAULT_MAX_PROXY_RETRIES,
+                .randomize_username = 0,
+                .randomize_password = 0,
+                .cache_dir = DEFAULT_CACHE_DIR,
+                .cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH,
+                .cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX};
+
+  if (load_config("config.ini", &cfg) != 0) {
+    fprintf(stderr, "[WARN] Could not load config file, using defaults\n");
   }
   }
 
 
-  if (cache_init(config.cache_dir) != 0) {
-    fprintf(
-        stderr,
-        "Warning: Failed to initialize cache, continuing without caching\n");
+  if (cache_init(cfg.cache_dir) != 0) {
+    fprintf(stderr,
+            "[WARN] Failed to initialize cache, continuing without caching\n");
   } else {
   } else {
-    fprintf(stderr, "Cache initialized at %s\n", config.cache_dir);
-    cache_cleanup(config.cache_ttl_search);
+    fprintf(stderr, "[INFO] Cache initialized at %s\n", cfg.cache_dir);
+    cache_cleanup(cfg.cache_ttl_search);
   }
   }
 
 
-  set_cache_ttl_search(config.cache_ttl_search);
-  set_cache_ttl_infobox(config.cache_ttl_infobox);
+  set_cache_ttl_search(cfg.cache_ttl_search);
+  set_cache_ttl_infobox(cfg.cache_ttl_infobox);
 
 
-  if (config.proxy_list_file[0] != '\0') {
-    if (load_proxy_list(config.proxy_list_file) < 0) {
-      fprintf(
-          stderr,
-          "Warning: Failed to load proxy list, continuing without proxies\n");
+  if (cfg.proxy_list_file[0] != '\0') {
+    if (load_proxy_list(cfg.proxy_list_file) < 0) {
+      fprintf(stderr,
+              "[WARN] Failed to load proxy list, continuing without proxies\n");
     }
     }
   }
   }
 
 
-  max_proxy_retries = config.max_proxy_retries;
-  set_proxy_config(config.proxy, config.randomize_username,
-                   config.randomize_password);
+  max_proxy_retries = cfg.max_proxy_retries;
+  set_proxy_config(cfg.proxy, cfg.randomize_username, cfg.randomize_password);
 
 
   if (proxy_url[0] != '\0') {
   if (proxy_url[0] != '\0') {
-    fprintf(stderr, "Using proxy: %s\n", proxy_url);
+    fprintf(stderr, "[INFO] Using proxy: %s\n", proxy_url);
   } else if (proxy_count > 0) {
   } else if (proxy_count > 0) {
-    fprintf(stderr, "Using %d proxies from %s\n", proxy_count,
-            config.proxy_list_file);
+    fprintf(stderr, "[INFO] Using %d proxies from %s\n", proxy_count,
+            cfg.proxy_list_file);
   }
   }
 
 
   set_handler("/", home_handler);
   set_handler("/", home_handler);
@@ -85,12 +82,12 @@ int main() {
   set_handler("/images", images_handler);
   set_handler("/images", images_handler);
   set_handler("/proxy", image_proxy_handler);
   set_handler("/proxy", image_proxy_handler);
 
 
-  fprintf(stderr, "Starting Omnisearch on %s:%d\n", config.host, config.port);
+  fprintf(stderr, "[INFO] Starting Omnisearch on %s:%d\n", cfg.host, cfg.port);
 
 
-  int result = beaker_run(config.host, config.port);
+  int result = beaker_run(cfg.host, cfg.port);
 
 
   if (result != 0) {
   if (result != 0) {
-    fprintf(stderr, "Error: Beaker server failed to start.\n");
+    fprintf(stderr, "[ERROR] Beaker server failed to start.\n");
     curl_global_cleanup();
     curl_global_cleanup();
     xmlCleanupParser();
     xmlCleanupParser();
     return EXIT_FAILURE;
     return EXIT_FAILURE;

+ 1 - 3
src/Routes/Search.c

@@ -7,6 +7,7 @@
 #include "../Scraping/Scraping.h"
 #include "../Scraping/Scraping.h"
 #include "../Utility/Display.h"
 #include "../Utility/Display.h"
 #include "../Utility/Unescape.h"
 #include "../Utility/Unescape.h"
+#include "Config.h"
 #include <ctype.h>
 #include <ctype.h>
 #include <pthread.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdio.h>
@@ -14,9 +15,6 @@
 #include <string.h>
 #include <string.h>
 #include <time.h>
 #include <time.h>
 
 
-#define INFOBOX_FIELD_COUNT 4
-#define MAX_RESULTS_PER_ENGINE 10
-
 typedef struct {
 typedef struct {
   const char *query;
   const char *query;
   InfoBox result;
   InfoBox result;

+ 15 - 11
src/Scraping/Scraping.c

@@ -3,6 +3,7 @@
 #include "../Proxy/Proxy.h"
 #include "../Proxy/Proxy.h"
 #include "../Utility/Unescape.h"
 #include "../Utility/Unescape.h"
 #include "../Utility/XmlHelper.h"
 #include "../Utility/XmlHelper.h"
+#include "Config.h"
 #include <curl/curl.h>
 #include <curl/curl.h>
 #include <libxml/HTMLparser.h>
 #include <libxml/HTMLparser.h>
 #include <libxml/xpath.h>
 #include <libxml/xpath.h>
@@ -18,8 +19,8 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
   MemoryBuffer *mem = (MemoryBuffer *)userp;
   MemoryBuffer *mem = (MemoryBuffer *)userp;
 
 
   if (mem->size + realsize + 1 > mem->capacity) {
   if (mem->size + realsize + 1 > mem->capacity) {
-
-    size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
+    size_t new_cap =
+        mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2;
     while (new_cap < mem->size + realsize + 1)
     while (new_cap < mem->size + realsize + 1)
       new_cap *= 2;
       new_cap *= 2;
 
 
@@ -38,7 +39,7 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
   return realsize;
   return realsize;
 }
 }
 
 
-static const char *get_random_user_agent() {
+static const char *get_random_user_agent(void) {
   static const char *agents[] = {
   static const char *agents[] = {
       "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
       "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
       "like Gecko) Chrome/120.0.0.0 Safari/537.36",
       "like Gecko) Chrome/120.0.0.0 Safari/537.36",
@@ -324,6 +325,9 @@ const SearchEngine ENGINE_REGISTRY[] = {
 
 
 const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
 const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
 
 
+#define CURL_TIMEOUT 15L
+#define CURL_DNS_TIMEOUT 300L
+
 static void configure_curl_handle(CURL *curl, const char *full_url,
 static void configure_curl_handle(CURL *curl, const char *full_url,
                                   MemoryBuffer *chunk,
                                   MemoryBuffer *chunk,
                                   struct curl_slist *headers) {
                                   struct curl_slist *headers) {
@@ -335,9 +339,9 @@ static void configure_curl_handle(CURL *curl, const char *full_url,
 
 
   curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
   curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
   curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
   curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
-  curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, 300L);
+  curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, CURL_DNS_TIMEOUT);
   curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
   curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
-  curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L);
+  curl_easy_setopt(curl, CURLOPT_TIMEOUT, CURL_TIMEOUT);
   curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
   curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
   curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "");
   curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "");
 
 
@@ -348,19 +352,19 @@ static char *build_search_url(const char *base_url, const char *page_param,
                               int page_multiplier, int page_base,
                               int page_multiplier, int page_base,
                               const char *encoded_query, int page) {
                               const char *encoded_query, int page) {
   int page_value = (page < 1 ? 1 : page - 1) * page_multiplier + page_base;
   int page_value = (page < 1 ? 1 : page - 1) * page_multiplier + page_base;
-  char *url = malloc(1024);
+  char *url = malloc(BUFFER_SIZE_LARGE);
   if (!url) {
   if (!url) {
     return NULL;
     return NULL;
   }
   }
-  snprintf(url, 1024, "%s%s&%s=%d", base_url, encoded_query, page_param,
-           page_value);
+  snprintf(url, BUFFER_SIZE_LARGE, "%s%s&%s=%d", base_url, encoded_query,
+           page_param, page_value);
   return url;
   return url;
 }
 }
 
 
 static struct curl_slist *build_request_headers(const char *host_header,
 static struct curl_slist *build_request_headers(const char *host_header,
                                                 const char *referer) {
                                                 const char *referer) {
   struct curl_slist *headers = NULL;
   struct curl_slist *headers = NULL;
-  char host_buf[256], ref_buf[256];
+  char host_buf[BUFFER_SIZE_MEDIUM], ref_buf[BUFFER_SIZE_MEDIUM];
 
 
   snprintf(host_buf, sizeof(host_buf), "Host: %s", host_header);
   snprintf(host_buf, sizeof(host_buf), "Host: %s", host_header);
   snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", referer);
   snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", referer);
@@ -486,9 +490,9 @@ retry:
       continue;
       continue;
     }
     }
 
 
-    job->response.memory = (char *)malloc(16384);
+    job->response.memory = (char *)malloc(INITIAL_BUFFER_SIZE);
     job->response.size = 0;
     job->response.size = 0;
-    job->response.capacity = 16384;
+    job->response.capacity = INITIAL_BUFFER_SIZE;
 
 
     struct curl_slist *headers =
     struct curl_slist *headers =
         build_request_headers(job->engine->host_header, job->engine->referer);
         build_request_headers(job->engine->host_header, job->engine->referer);

+ 2 - 1
src/Utility/Display.c

@@ -1,4 +1,5 @@
 #include "Display.h"
 #include "Display.h"
+#include "Config.h"
 #include <ctype.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <stdlib.h>
 #include <string.h>
 #include <string.h>
@@ -20,7 +21,7 @@ char *pretty_display_url(const char *input) {
   }
   }
 
 
   size_t input_len = strlen(start);
   size_t input_len = strlen(start);
-  char temp[512];
+  char temp[BUFFER_SIZE_MEDIUM];
   strncpy(temp, start, sizeof(temp) - 1);
   strncpy(temp, start, sizeof(temp) - 1);
   temp[sizeof(temp) - 1] = '\0';
   temp[sizeof(temp) - 1] = '\0';
 
 

+ 50 - 3
src/Utility/HttpClient.c

@@ -1,5 +1,7 @@
 #include "HttpClient.h"
 #include "HttpClient.h"
+#include "../Cache/Cache.h"
 #include "../Proxy/Proxy.h"
 #include "../Proxy/Proxy.h"
+#include "Config.h"
 #include <stdlib.h>
 #include <stdlib.h>
 #include <string.h>
 #include <string.h>
 
 
@@ -9,7 +11,8 @@ static size_t write_callback(void *contents, size_t size, size_t nmemb,
   HttpResponse *mem = (HttpResponse *)userp;
   HttpResponse *mem = (HttpResponse *)userp;
 
 
   if (mem->size + realsize + 1 > mem->capacity) {
   if (mem->size + realsize + 1 > mem->capacity) {
-    size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
+    size_t new_cap =
+        mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2;
     while (new_cap < mem->size + realsize + 1)
     while (new_cap < mem->size + realsize + 1)
       new_cap *= 2;
       new_cap *= 2;
 
 
@@ -35,11 +38,11 @@ HttpResponse http_get(const char *url, const char *user_agent) {
     return resp;
     return resp;
   }
   }
 
 
-  resp.memory = malloc(16384);
+  resp.memory = malloc(INITIAL_BUFFER_SIZE);
   if (!resp.memory) {
   if (!resp.memory) {
     return resp;
     return resp;
   }
   }
-  resp.capacity = 16384;
+  resp.capacity = INITIAL_BUFFER_SIZE;
 
 
   CURL *curl = curl_easy_init();
   CURL *curl = curl_easy_init();
   if (!curl) {
   if (!curl) {
@@ -79,3 +82,47 @@ void http_response_free(HttpResponse *resp) {
   resp->size = 0;
   resp->size = 0;
   resp->capacity = 0;
   resp->capacity = 0;
 }
 }
+
+CachedHttpResponse cached_http_get(const char *url, const char *user_agent,
+                                   const char *cache_key, time_t cache_ttl,
+                                   XmlParserFn parser) {
+  CachedHttpResponse result = {
+      .memory = NULL, .size = 0, .parsed_result = NULL, .success = 0};
+
+  if (!url || !parser) {
+    return result;
+  }
+
+  if (cache_key && cache_ttl > 0) {
+    char *cached_data = NULL;
+    size_t cached_size = 0;
+    if (cache_get(cache_key, cache_ttl, &cached_data, &cached_size) == 0 &&
+        cached_data && cached_size > 0) {
+      xmlDocPtr doc = parser(cached_data, cached_size, url);
+      if (doc) {
+        result.parsed_result = doc;
+        result.success = 1;
+      }
+      free(cached_data);
+      return result;
+    }
+    free(cached_data);
+  }
+
+  HttpResponse resp = http_get(url, user_agent);
+  if (resp.memory && resp.size > 0) {
+    if (cache_key && cache_ttl > 0) {
+      cache_set(cache_key, resp.memory, resp.size);
+    }
+
+    xmlDocPtr doc = parser(resp.memory, resp.size, url);
+    if (doc) {
+      result.parsed_result = doc;
+      result.success = 1;
+    }
+  }
+
+  result.memory = resp.memory;
+  result.size = resp.size;
+  return result;
+}

+ 16 - 0
src/Utility/HttpClient.h

@@ -2,7 +2,9 @@
 #define HTTPCLIENT_H
 #define HTTPCLIENT_H
 
 
 #include <curl/curl.h>
 #include <curl/curl.h>
+#include <libxml/parser.h>
 #include <stddef.h>
 #include <stddef.h>
+#include <time.h>
 
 
 typedef struct {
 typedef struct {
   char *memory;
   char *memory;
@@ -13,4 +15,18 @@ typedef struct {
 HttpResponse http_get(const char *url, const char *user_agent);
 HttpResponse http_get(const char *url, const char *user_agent);
 void http_response_free(HttpResponse *resp);
 void http_response_free(HttpResponse *resp);
 
 
+typedef xmlDocPtr (*XmlParserFn)(const char *data, size_t size,
+                                 const char *url);
+
+typedef struct {
+  char *memory;
+  size_t size;
+  void *parsed_result;
+  int success;
+} CachedHttpResponse;
+
+CachedHttpResponse cached_http_get(const char *url, const char *user_agent,
+                                   const char *cache_key, time_t cache_ttl,
+                                   XmlParserFn parser);
+
 #endif
 #endif