Skip to content

Commit

Permalink
Use Opensearch as the tool for searching posts
Browse files Browse the repository at this point in the history
Full details and diagrams will be in the PR (#296), but the core
details are:
* the previous searching tool was extremely naive, entirely using `String.contains`
* there was no spell correction, nor easy way to add it in
* results came back in an arbitrary order and weren't easily scored

A search engine ('SE' for short, used in the codebase) is a much more
appropriate tool. This change migrates the searching, scoring, and ranking
logic to Opensearch to return a list of ordered SearchItem instances,
which we then use to return a list of ordered PostItem instances to the user.
  • Loading branch information
Willdotwhite committed Apr 19, 2024
1 parent 5d1a653 commit 9cceeb5
Show file tree
Hide file tree
Showing 18 changed files with 504 additions and 155 deletions.
5 changes: 5 additions & 0 deletions api/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ dependencies {
// DB
implementation("org.litote.kmongo:kmongo:4.11.0")

// SE
implementation("org.apache.httpcomponents.core5:httpcore5:5.2.4")
implementation("org.apache.httpcomponents.core5:httpcore5-h2:5.2.4")
implementation("org.opensearch.client:opensearch-java:2.10.0")

// Discord bot
implementation("org.javacord:javacord:3.8.0")

Expand Down
14 changes: 7 additions & 7 deletions api/src/main/kotlin/com/gmtkgamejam/Application.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ import io.ktor.http.*
import io.ktor.serialization.kotlinx.json.*
import io.ktor.server.application.*
import io.ktor.server.plugins.contentnegotiation.*
import io.ktor.server.plugins.cors.*
import io.ktor.server.plugins.cors.routing.CORS
import io.ktor.server.plugins.cors.routing.*
import kotlinx.serialization.json.Json
import org.koin.core.context.startKoin
import org.koin.environmentProperties
Expand All @@ -22,13 +21,14 @@ fun Application.module() {
modules(DatabaseModule, DiscordBotModule)
}

configureRequestHandling()
configureUserInfoRouting()
configureAuthRouting()
configureAdminRouting()
configurePostRouting()
configureFavouritesRouting()
configureAuthRouting()
configureDiscordBotRouting()
configureFavouritesRouting()
configureInfraRouting()
configurePostRouting()
configureRequestHandling()
configureUserInfoRouting()
}

fun Application.configureRequestHandling() {
Expand Down
24 changes: 24 additions & 0 deletions api/src/main/kotlin/com/gmtkgamejam/ApplicationCallExtensions.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.gmtkgamejam

import com.auth0.jwt.JWT
import com.gmtkgamejam.models.auth.AuthTokenSet
import com.gmtkgamejam.services.AuthService
import io.ktor.http.*
import io.ktor.server.application.*
import io.ktor.server.request.*
import io.ktor.server.response.*

fun ApplicationCall.getAuthTokenSet(authService: AuthService): AuthTokenSet? {
return this.request.header("Authorization")
?.substring(7)
?.let { JWT.decode(it) }?.getClaim("id")?.asString()
?.let { authService.getTokenSet(it) }
}

suspend fun ApplicationCall.respondJSON(text: String, status: HttpStatusCode? = null) {
if (status != null) {
response.status(status)
}

respond(mapOf("message" to text))
}

This file was deleted.

5 changes: 5 additions & 0 deletions api/src/main/kotlin/com/gmtkgamejam/Config.kt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ object Config {
*/
fun getString(key: String): String = config.property(key).getString()

/**
* Get property int value
*/
fun getInt(key: String): Int = getString(key).toInt()

/**
* Get property list value
*/
Expand Down
9 changes: 2 additions & 7 deletions api/src/main/kotlin/com/gmtkgamejam/EnumExtensions.kt
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
package com.gmtkgamejam

/**
* Floating function to cast a String to an Enum without throwing an exception
*
* Suggest using with mapNotNull{} where possible
*/
inline fun <reified A : Enum<A>> enumFromStringSafe(value: String) : A? {
return enumValues<A>().find { s -> s.name == value.uppercase() }
inline fun <reified A : Enum<A>> enumSetFromInput(commaSeparatedString: String) : Set<A> {
return commaSeparatedString.split(',').filter(String::isNotBlank).map { enumValueOf<A>(it) }.toSet()
}
34 changes: 34 additions & 0 deletions api/src/main/kotlin/com/gmtkgamejam/models/posts/SearchItem.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.gmtkgamejam.models.posts

/**
* Data model for an entry in the search engine
*
* This is a stripped-down PostItem with just the fields we use for searching
*
* Any index mappings (i.e. the `description_shingle` defined in OpensearchClusterConfigurer) will be applied and
* computed by the SE at index-time
*/
data class SearchItem(
val id: String,
var description_shingle: String,
var size: Int,
var skillsPossessed: Set<Skills>?,
var skillsSought: Set<Skills>?,
var preferredTools: Set<Tools>?,
var availability: Availability,
var timezoneOffsets: Set<Int>,
var languages: Set<String>,
) {
constructor(postItem: PostItem): this(
id = postItem.id,
// TODO: what work do we want to do here that the SE won't already do?
description_shingle = postItem.description.lowercase().replace("\n", " "),
size = postItem.size,
skillsPossessed = postItem.skillsPossessed,
skillsSought = postItem.skillsSought,
preferredTools = postItem.preferredTools,
availability = postItem.availability,
timezoneOffsets = postItem.timezoneOffsets,
languages = postItem.languages,
)
}
28 changes: 28 additions & 0 deletions api/src/main/kotlin/com/gmtkgamejam/routing/InfraRoutes.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.gmtkgamejam.routing

import com.gmtkgamejam.models.posts.PostItem
import com.gmtkgamejam.respondJSON
import com.gmtkgamejam.search.OpensearchClusterConfigurer
import com.gmtkgamejam.services.PostService
import io.ktor.http.*
import io.ktor.server.application.*
import io.ktor.server.routing.*
import org.litote.kmongo.eq

// TODO: Auth control
fun Application.configureInfraRouting() {

val postService = PostService()

routing {
route("/infra") {
route("/se") {
get("/reset") {
val posts = postService.getPosts(PostItem::deletedAt eq null)
OpensearchClusterConfigurer.initCluster(posts)
call.respondJSON("Search engine reset complete", HttpStatusCode.OK)
}
}
}
}
}
147 changes: 23 additions & 124 deletions api/src/main/kotlin/com/gmtkgamejam/routing/PostRoutes.kt
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
package com.gmtkgamejam.routing

import com.auth0.jwt.JWT
import com.gmtkgamejam.enumFromStringSafe
import com.gmtkgamejam.models.posts.Availability
import com.gmtkgamejam.getAuthTokenSet
import com.gmtkgamejam.models.posts.PostItem
import com.gmtkgamejam.models.posts.Skills
import com.gmtkgamejam.models.posts.Tools
import com.gmtkgamejam.models.posts.SearchItem
import com.gmtkgamejam.models.posts.dtos.PostItemCreateDto
import com.gmtkgamejam.models.posts.dtos.PostItemReportDto
import com.gmtkgamejam.models.posts.dtos.PostItemUnableToContactReportDto
import com.gmtkgamejam.models.posts.dtos.PostItemUpdateDto
import com.gmtkgamejam.respondJSON
import com.gmtkgamejam.search.OpenSearch
import com.gmtkgamejam.search.SearchParams
import com.gmtkgamejam.services.AuthService
import com.gmtkgamejam.services.FavouritesService
import com.gmtkgamejam.services.PostService
Expand All @@ -20,14 +19,9 @@ import io.ktor.server.auth.*
import io.ktor.server.request.*
import io.ktor.server.response.*
import io.ktor.server.routing.*
import java.time.format.DateTimeFormatter
import java.time.LocalDateTime
import org.bson.conversions.Bson
import org.litote.kmongo.*
import java.time.format.DateTimeFormatter
import kotlin.math.min
import kotlin.reflect.KClass
import kotlin.reflect.full.memberProperties
import kotlin.text.Regex.Companion.escape

fun Application.configurePostRouting() {

Expand All @@ -38,18 +32,14 @@ fun Application.configurePostRouting() {
routing {
route("/posts") {
get {
val params = call.parameters

val posts = service.getPosts(and(getFilterFromParameters(params)), getSortFromParameters(params))
val searchParams = SearchParams(call.parameters)
val postIds = OpenSearch.search(searchParams.query(), searchParams.sort())
val posts = service.getPostsByOrderedIds(postIds)

// Set isFavourite on posts for this user if they're logged in
call.request.header("Authorization")?.substring(7)
?.let { JWT.decode(it) }?.getClaim("id")?.asString()
?.let { authService.getTokenSet(it) }
call.getAuthTokenSet(authService)
?.let { favouritesService.getFavouritesByUserId(it.discordId) }
?.let { favouritesList ->
posts.map { it.isFavourite = favouritesList.postIds.contains(it.id) }
}
?.let { favouritesList -> posts.map { it.isFavourite = favouritesList.postIds.contains(it.id) } }

call.respond(posts)
}
Expand All @@ -61,13 +51,9 @@ fun Application.configurePostRouting() {
}

// Set isFavourite on posts for this user if they're logged in
call.request.header("Authorization")?.substring(7)
?.let { JWT.decode(it) }?.getClaim("id")?.asString()
?.let { authService.getTokenSet(it) }
call.getAuthTokenSet(authService)
?.let { favouritesService.getFavouritesByUserId(it.discordId) }
?.let { favouritesList ->
post?.isFavourite = favouritesList.postIds.contains(post?.id)
}
?.let { favouritesList -> post?.isFavourite = favouritesList.postIds.contains(post?.id) }

post?.let { return@get call.respond(it) }
call.respondJSON("Post not found", status = HttpStatusCode.NotFound)
Expand All @@ -93,15 +79,14 @@ fun Application.configurePostRouting() {
data.timezoneOffsets = data.timezoneOffsets.filter { tz -> tz >= -12 && tz <= 12 }.toSet()
}
?.let { PostItem.fromCreateDto(data) }
?.let { service.createPost(it) }
?.also { service.createPost(it) }
?.also { OpenSearch.index(SearchItem(it)) }
?.let { return@post call.respond(it) }

call.respondJSON("Post could not be created", status = HttpStatusCode.NotFound)
}

get("favourites") {
val params = call.parameters

val favourites = authService.getTokenSet(call)
?.let { favouritesService.getFavouritesByUserId(it.discordId) }

Expand All @@ -110,18 +95,12 @@ fun Application.configurePostRouting() {
return@get call.respond(emptyList<PostItem>())
}

val favouritesFilters = mutableListOf<Bson>()
favourites.postIds.forEach {
favouritesFilters.add(and(PostItem::id eq it, PostItem::deletedAt eq null))
}
val searchParams = SearchParams(call.parameters)
val postIds = OpenSearch.search(searchParams.query(), searchParams.sort())
.filter { favourites.postIds.contains(it) }
.toList()

val posts = service.getPosts(
and(
or(favouritesFilters),
and(getFilterFromParameters(params))
),
getSortFromParameters(params)
)
val posts = service.getPostsByOrderedIds(postIds)
posts.map { post -> post.isFavourite = true }

call.respond(posts)
Expand Down Expand Up @@ -157,6 +136,7 @@ fun Application.configurePostRouting() {
post.updatedAt = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))

service.updatePost(post)
OpenSearch.update(SearchItem(post))
return@put call.respond(post)
}

Expand All @@ -167,10 +147,9 @@ fun Application.configurePostRouting() {
delete {
authService.getTokenSet(call)
?.let { service.getPostByAuthorId(it.discordId) }
?.let {
service.deletePost(it)
return@delete call.respondJSON("Post deleted", status = HttpStatusCode.OK)
}
?.also { service.deletePost(it) }
?.also { OpenSearch.delete(it.id) }
?.let { return@delete call.respondJSON("Post deleted", status = HttpStatusCode.OK) }

// TODO: Replace BadRequest with contextual response
call.respondJSON("Could not delete Post", status = HttpStatusCode.BadRequest)
Expand Down Expand Up @@ -210,83 +189,3 @@ fun Application.configurePostRouting() {
}
}
}

fun getFilterFromParameters(params: Parameters): List<Bson> {
val filters = mutableListOf(PostItem::deletedAt eq null)

params["description"]?.split(',')
?.filter(String::isNotBlank) // Filter out empty `&description=`
?.map { it -> it.trim() }
// The regex is the easiest way to check if a description contains a given substring
?.forEach { filters.add(PostItem::description regex escape(it).toRegex(RegexOption.IGNORE_CASE)) }

val skillsPossessedSearchMode = params["skillsPossessedSearchMode"] ?: "and"
params["skillsPossessed"]?.split(',')
?.filter(String::isNotBlank) // Filter out empty `&skillsPossessed=`
?.mapNotNull { enumFromStringSafe<Skills>(it) }
?.map { PostItem::skillsPossessed contains it }
?.let { if (skillsPossessedSearchMode == "and") and(it) else or(it) }
?.let(filters::add)

val skillsSoughtSearchMode = params["skillsSoughtSearchMode"] ?: "and"
params["skillsSought"]?.split(',')
?.filter(String::isNotBlank) // Filter out empty `&skillsSought=`
?.mapNotNull { enumFromStringSafe<Skills>(it) }
?.map { PostItem::skillsSought contains it }
?.let { if (skillsSoughtSearchMode == "and") and(it) else or(it) }
?.let(filters::add)

params["tools"]?.split(',')
?.filter(String::isNotBlank) // Filter out empty `&skillsSought=`
?.mapNotNull { enumFromStringSafe<Tools>(it) }
?.map { PostItem::preferredTools contains it }
?.let(filters::addAll)

params["languages"]?.split(',')
?.filter(String::isNotBlank) // Filter out empty `&languages=`
?.map { PostItem::languages contains it }
?.let { filters.add(or(it)) }

params["availability"]?.split(',')
?.filter(String::isNotBlank) // Filter out empty `&availability=`
?.mapNotNull { enumFromStringSafe<Availability>(it) }
?.map { PostItem::availability eq it }
// Availabilities are mutually exclusive, so treat it as inclusion search
?.let { filters.add(or(it)) }

// If no timezones sent, lack of filters will search all timezones
if (params["timezoneStart"] != null && params["timezoneEnd"] != null) {
val timezoneStart: Int = params["timezoneStart"]!!.toInt()
val timezoneEnd: Int = params["timezoneEnd"]!!.toInt()

val timezones: MutableList<Int> = mutableListOf()
if (timezoneStart == timezoneEnd) {
timezones.add(timezoneStart)
} else if (timezoneStart < timezoneEnd) {
// UTC-2 -> UTC+2 should be: [-2, -1, 0, 1, 2]
timezones.addAll((timezoneStart..timezoneEnd))
} else {
// UTC+9 -> UTC-9 should be: [9, 10, 11, 12, -12, -11, -10, -9]
timezones.addAll((timezoneStart..12))
timezones.addAll((-12..timezoneEnd))
}

// Add all timezone searches as eq checks
// It's brute force, but easier to confirm
timezones
.map { PostItem::timezoneOffsets contains it }
.let { filters.add(or(it)) }
}

return filters
}

fun getSortFromParameters(params: Parameters): Bson {
val sortByFieldName = params["sortBy"] ?: "createdAt"
val sortByField = PostItem::class.memberProperties.first { prop -> prop.name == sortByFieldName }
return when (params["sortDir"].toString()) {
"asc" -> ascending(sortByField)
"desc" -> descending(sortByField)
else -> descending(sortByField)
}
}
Loading

0 comments on commit 9cceeb5

Please sign in to comment.