Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions src/mavedb/lib/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import io
import logging
import re
from collections import Counter
from collections import Counter, defaultdict
from operator import attrgetter
from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, List, Literal, Optional, Sequence

Expand Down Expand Up @@ -54,7 +54,7 @@
from mavedb.models.uniprot_offset import UniprotOffset
from mavedb.models.user import User
from mavedb.models.variant import Variant
from mavedb.view_models.search import ScoreSetsSearch
from mavedb.view_models.search import ScoreSetsSearch, ControlledKeywordFilterOption

if TYPE_CHECKING:
from mavedb.lib.permissions import Action
Expand Down Expand Up @@ -216,16 +216,21 @@ def build_search_score_sets_query_filter(
)
)

if search.keywords:
query = query.filter(
ScoreSet.experiment.has(
Experiment.keyword_objs.any(
ExperimentControlledKeywordAssociation.controlled_keyword.has(
ControlledKeyword.label.in_(search.keywords)
if search.controlled_keywords:
for item in search.controlled_keywords:
query = query.filter(
ScoreSet.experiment.has(
Experiment.keyword_objs.any(
ExperimentControlledKeywordAssociation.controlled_keyword.has(
and_(
ControlledKeyword.key == item.key,
ControlledKeyword.label == item.label,
)
)
)
)
)
)

Comment on lines +219 to +233
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, actually one thing that I noticed based on the front end. If a user searches for 'Other' (or any value which is ambiguous between keyword categories), all results will show up regardless of the intended class being searched. We may need to reqork the way that the controlled keyword search works to accept both a value and a key, similar to how you return both a value and a key. This would let us more precisely match the users' search request.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for finding this! It's a serious problem.

return query


Expand Down Expand Up @@ -334,6 +339,8 @@ def fetch_score_set_search_filter_options(
publication_author_name_counter: Counter[str] = Counter()
publication_db_name_counter: Counter[str] = Counter()
publication_journal_counter: Counter[str] = Counter()
# Controlled keywords related counters
controlled_keywords_counter: dict[str, Counter[str]] = defaultdict(Counter)

# --- PERFORMANCE NOTE ---
# The following counter construction loop is a bottleneck for large score set queries.
Expand Down Expand Up @@ -388,6 +395,23 @@ def fetch_score_set_search_filter_options(
if journal:
publication_journal_counter[journal] += 1

# Controlled keywords related options
for controlled_keyword in getattr(score_set.experiment, "keyword_objs", []):
keyword = getattr(controlled_keyword, "controlled_keyword", [])
if not keyword:
continue
key = getattr(keyword, "key", None)
label = getattr(keyword, "label", None)
if key and label:
controlled_keywords_counter[key][label] += 1

controlled_keywords_counter_list = []
for key, label_counter in controlled_keywords_counter.items():
for label, count in label_counter.items():
controlled_keywords_counter_list.append(
ControlledKeywordFilterOption(key=key, value=label, count=count)
)

logger.debug(msg="Score set search filter options were fetched.", extra=logging_context())

return {
Expand All @@ -398,6 +422,7 @@ def fetch_score_set_search_filter_options(
"publication_author_names": score_set_search_filter_options_from_counter(publication_author_name_counter),
"publication_db_names": score_set_search_filter_options_from_counter(publication_db_name_counter),
"publication_journals": score_set_search_filter_options_from_counter(publication_journal_counter),
"controlled_keywords": controlled_keywords_counter_list,
}


Expand Down
17 changes: 16 additions & 1 deletion src/mavedb/view_models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
from mavedb.view_models.score_set import ShortScoreSet


class ControlledKeywordSearch(BaseModel):
key: str
label: str


class ExperimentsSearch(BaseModel):
published: Optional[bool] = None
authors: Optional[list[str]] = None
Expand All @@ -25,7 +30,7 @@ class ScoreSetsSearch(BaseModel):
databases: Optional[list[str]] = None
journals: Optional[list[str]] = None
publication_identifiers: Optional[list[str]] = None
keywords: Optional[list[str]] = None
controlled_keywords: Optional[list[ControlledKeywordSearch]] = None
text: Optional[str] = None
include_experiment_score_set_urns_and_count: Optional[bool] = True
offset: Optional[int] = None
Expand All @@ -40,6 +45,15 @@ class Config:
from_attributes = True


class ControlledKeywordFilterOption(BaseModel):
key: str
value: str
count: int

class Config:
from_attributes = True


class ScoreSetsSearchFilterOption(BaseModel):
value: str
count: int
Expand All @@ -56,6 +70,7 @@ class ScoreSetsSearchFilterOptionsResponse(BaseModel):
publication_author_names: list[ScoreSetsSearchFilterOption]
publication_db_names: list[ScoreSetsSearchFilterOption]
publication_journals: list[ScoreSetsSearchFilterOption]
controlled_keywords: list[ControlledKeywordFilterOption]

class Config:
from_attributes = True
Expand Down
4 changes: 3 additions & 1 deletion tests/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2184,7 +2184,9 @@
"databases": ["uniprot"],
"journals": ["biomed"],
"publication_identifiers": ["12345678"],
"keywords": ["keyword"],
"controlled_keywords": [
{"key": "keyword_key", "label": "keyword_label"}
],
"text": "testtesttest",
}

Expand Down
5 changes: 5 additions & 0 deletions tests/lib/test_score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ def test_fetch_score_set_search_filter_options_no_score_sets(setup_lib_db, sessi
filter_options = fetch_score_set_search_filter_options(session, None, None, score_set_search)

assert filter_options == {
"controlled_keywords": [],
"target_gene_categories": [],
"target_gene_names": [],
"target_organism_names": [],
Expand Down Expand Up @@ -437,6 +438,7 @@ def test_fetch_score_set_search_filter_options_with_score_set(setup_lib_db, sess
filter_options = fetch_score_set_search_filter_options(session, user_data, None, score_set_search)

assert filter_options == {
"controlled_keywords": [],
"target_gene_categories": [{"value": TargetCategory.protein_coding, "count": 1}],
"target_gene_names": [{"value": "TEST2", "count": 1}],
"target_organism_names": [],
Expand Down Expand Up @@ -511,6 +513,7 @@ def test_fetch_score_set_search_filter_options_with_partial_filtered_score_sets(
user_data = UserData(user=requesting_user, active_roles=[])
filter_options = fetch_score_set_search_filter_options(session, user_data, None, score_set_search)
assert filter_options == {
"controlled_keywords": [],
"target_gene_categories": [{"value": TargetCategory.protein_coding, "count": 1}],
"target_gene_names": [{"value": "TEST1", "count": 1}],
"target_organism_names": [{"count": 1, "value": "Organism name"}],
Expand All @@ -528,6 +531,7 @@ def test_fetch_score_set_search_filter_options_with_no_matching_score_sets(setup
filter_options = fetch_score_set_search_filter_options(session, user_data, None, score_set_search)

assert filter_options == {
"controlled_keywords": [],
"target_gene_categories": [],
"target_gene_names": [],
"target_organism_names": [],
Expand All @@ -543,6 +547,7 @@ def test_fetch_score_set_search_filter_options_with_no_permitted_score_sets(setu
filter_options = fetch_score_set_search_filter_options(session, None, None, score_set_search)

assert filter_options == {
"controlled_keywords": [],
"target_gene_categories": [],
"target_gene_names": [],
"target_organism_names": [],
Expand Down
6 changes: 5 additions & 1 deletion tests/view_models/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ def test_populated_experiment_search():

def test_populated_score_set_search():
score_set_search = ScoreSetsSearch(**TEST_POPULATED_SCORE_SET_SEARCH)
assert all(score_set_search.__getattribute__(k) == v for k, v in TEST_POPULATED_SCORE_SET_SEARCH.items())
for k, v in TEST_POPULATED_SCORE_SET_SEARCH.items():
if k == "controlled_keywords":
assert [item.model_dump() for item in score_set_search.controlled_keywords] == v
else:
assert getattr(score_set_search, k) == v


def test_populated_text_search():
Expand Down
Loading