geocoding

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.nihaljere.xyz/geocoding
Log | Files | Refs

commit 744ee1aa0ad00eae9a6828f1e796972510560dce
Author: Nihal Jere <nihal@nihaljere.xyz>
Date:   Thu, 25 May 2023 21:51:37 -0500

init

Diffstat:
Ageo.c | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aimport-fts.py | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 215 insertions(+), 0 deletions(-)

diff --git a/geo.c b/geo.c @@ -0,0 +1,164 @@ +#include <sys/types.h> /* size_t, ssize_t */ +#include <stdarg.h> /* va_list */ +#include <stddef.h> /* NULL */ +#include <stdint.h> /* int64_t */ +#include <kcgi.h> +#include <kcgijson.h> + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include <sqlite3.h> + +#define DB_PATH "/data-fts.db" + +enum keys { + KEY_QUERY, + KEY__MAX +}; + +const struct kvalid keys[KEY__MAX] = { + [KEY_QUERY] = { NULL, "q" }, +}; + +sqlite3_stmt *qget, *qcount; + +int64_t +perform_count(char *string) +{ + sqlite3_clear_bindings(qcount); + sqlite3_reset(qcount); + + if (sqlite3_bind_text(qcount, 1, string, -1, SQLITE_STATIC) != SQLITE_OK) + goto fail; + + int ret = sqlite3_step(qcount); + if (ret != SQLITE_ROW) { + goto fail; + } + + int64_t count = sqlite3_column_int64(qcount, 0); + + ret = sqlite3_step(qcount); + if (ret != SQLITE_DONE) { + goto fail; + } + + return count; + +fail: + return -1; +} + +int +perform_query(int fuzzy, struct kreq *r, char *string) +{ + size_t len = strlen(string); + + char *buf = malloc(len + 2); + if (buf == NULL) + return 0; + + strcpy(buf, string); + if (fuzzy) { + buf[len] = '*'; + buf[len + 1] = '\00'; + } + + struct kjsonreq req; + kjson_open(&req, r); + kjson_array_open(&req); + + sqlite3_clear_bindings(qget); + sqlite3_reset(qget); + + if (sqlite3_bind_text(qget, 1, buf, -1, SQLITE_STATIC) != SQLITE_OK) + goto fail; + + int ret = sqlite3_step(qget); + while (ret == SQLITE_ROW) { + kjson_obj_open(&req); + kjson_putstringp(&req, "name", sqlite3_column_text(qget, 0)); + kjson_putstringp(&req, "lat", sqlite3_column_text(qget, 1)); + kjson_putstringp(&req, "lon", sqlite3_column_text(qget, 2)); + + // use state name instead of country if in US + if (strcmp(sqlite3_column_text(qget, 3), "US") == 0) + kjson_putstringp(&req, "name2", sqlite3_column_text(qget, 4)); + else + kjson_putstringp(&req, "name2", sqlite3_column_text(qget, 3)); + kjson_obj_close(&req); + + ret = sqlite3_step(qget); + } + + kjson_array_close(&req); + kjson_close(&req); + free(buf); + return 1; + +fail: + free(buf); + return 0; +} + +int +main(void) +{ + sqlite3 *db; + + struct kreq req; + struct kfcgi *fcgi; + + int err = sqlite3_open(DB_PATH, &db); + if (err != SQLITE_OK) { + fprintf(stderr, "%s\n", sqlite3_errmsg(db)); + return 1; + } + +#define QUERY "SELECT places.name, latitude, longitude, countrycode, admin1code FROM places JOIN placenames ON places.geonameid = placenames.geonameid WHERE placenames.name MATCH ?1 and places.featureclass = 'P' order by places.population desc limit 10;" + if (sqlite3_prepare_v2(db, QUERY, sizeof(QUERY), &qget, NULL) != SQLITE_OK) { + fprintf(stderr, "%s: sqlite3_prepare failed: %s\n", __func__, sqlite3_errmsg(db)); + return 1; + } +#undef QUERY + +#define QUERY "SELECT count(*) FROM placenames WHERE name MATCH ?1;" + if (sqlite3_prepare_v2(db, QUERY, sizeof(QUERY), &qcount, NULL) != SQLITE_OK) { + fprintf(stderr, "%s: sqlite3_prepare failed: %s\n", __func__, sqlite3_errmsg(db)); + return 1; + } +#undef QUERY + + if (khttp_fcgi_init(&fcgi, keys, KEY__MAX, NULL, 0, 0) != KCGI_OK) + return 1; + + int ret; + while ((ret = khttp_fcgi_parse(fcgi, &req)) == KCGI_OK) { + if (req.fieldmap[KEY_QUERY] == NULL) { + khttp_head(&req, kresps[KRESP_STATUS], + "%s", khttps[KHTTP_400]); + khttp_body(&req); + khttp_free(&req); + continue; + } + + khttp_head(&req, kresps[KRESP_STATUS], + "%s", khttps[KHTTP_200]); + khttp_head(&req, kresps[KRESP_CONTENT_TYPE], + "%s", kmimetypes[KMIME_APP_JSON]); + khttp_body(&req); + int64_t count = perform_count(req.fieldmap[KEY_QUERY]->val); + if (count < 10000) { + perform_query(1, &req, req.fieldmap[KEY_QUERY]->val); + } else { + perform_query(0, &req, req.fieldmap[KEY_QUERY]->val); + } + khttp_free(&req); + } + + fprintf(stderr, "%d\n", ret); + khttp_fcgi_free(fcgi); + return 0; +} diff --git a/import-fts.py b/import-fts.py @@ -0,0 +1,51 @@ +import sqlite3 + +con = sqlite3.connect('data-fts.db') +cur = con.cursor() + +cur.execute('create table places(geonameid integer primary key, name text, asciiname text, alternativenames text, latitude text, longitude text, featureclass text, featurecode text, countrycode text, cc2 text, admin1code text, admin2code text, admin3code text, admin4code text, population integer, elevation integer, dem text, timezone text, modified text)') + +cur.execute('create virtual table placenames using fts3(geonameid integer primary key, name text, population integer)') + +values = [] +searchvalues = [] + +with open('allCountries.txt') as f: + i = 0 + for line in f: + if i % 50000 == 0: + cur.executemany('INSERT INTO places VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', values) + values = [] + cur.executemany('INSERT INTO placenames VALUES (?, ?, ?)', searchvalues) + searchvalues = [] + print(i) + + i += 1 + fields = line.split('\t') + geonameid = int(fields[0]) + name = fields[1] + asciiname = fields[2] + alternatenames = fields[3] + latitude = fields[4] + longitude = fields[5] + feature_class = fields[6] + feature_code = fields[7] + country_code = fields[8] + cc2 = fields[9] + admin1_code = fields[10] + admin2_code = fields[11] + admin3_code = fields[12] + admin4_code = fields[13] + population = fields[14] + elevation = fields[15] + dem = fields[16] + timezone = fields[17] + modified = fields[18][:-1] + + value = (geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class, feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code, admin4_code, population, elevation, dem, timezone, modified) + searchvalue = (geonameid, name, population) + values.append(value) + searchvalues.append(searchvalue) + +con.commit() +con.close()