Source code for compshs.utils.rank
"""
Created in 2025
@author: Simon Delarue <simon.delarue@telecom-paris.fr>
"""
import numpy as np
[docs]def top_k(values: np.ndarray, k: int = 1) -> np.ndarray:
"""Returns indices of the k highest values.
Parameters
----------
values: np.ndarray
Array of values.
k: int
Number of elements to return (default = 1).
Returns
-------
np.ndarray
Array of k indices.
"""
if k >= len(values):
return np.argsort(-values)
else:
return np.argpartition(-values, k)[:k]
[docs]def extract_top_words(viz_data, n_topics: int, lambdas: np.array, k: int) -> dict:
"""Extract top words for each topics in viz_data.
Use relevance metric to select top_words.
Parameters
----------
viz_data
Output from ``pyLDAvis`` library.
n_topics: int
Number of topics.
lambdas: np.array
Array of lamba values for relevance formula.
k: int
Top-k words are selected.
Returns
-------
dict
Dictionary with topic number as key and top words as values.
"""
top_words = {}
for topic_number in range(n_topics):
top_words_topic = set()
for lambda_ in lambdas:
df_tmp = viz_data.sorted_terms(topic=topic_number + 1, _lambda=lambda_)
top_words_tmp = set(df_tmp['Term'].values[:k])
top_words_topic |= top_words_tmp
top_words[topic_number] = top_words_topic
return top_words