Skip to content

Commit

Permalink
Adding general response certainty based on median log probability to …
Browse files Browse the repository at this point in the history
…output

Signed-off-by: Jiri Podivin <[email protected]>
  • Loading branch information
jpodivin committed Sep 5, 2024
1 parent 52e2a36 commit 96611d0
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 71 deletions.
1 change: 1 addition & 0 deletions .github/workflows/python-diff-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
- name: VCS Diff Lint
uses: fedora-copr/vcs-diff-lint-action@v1
id: VCS_Diff_Lint
install_rpm_packages: ["python3-numpy"]

- name: Upload artifact with detected defects in SARIF format
uses: actions/upload-artifact@v4
Expand Down
11 changes: 9 additions & 2 deletions logdetective/logdetective.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import sys

from logdetective.constants import DEFAULT_ADVISOR
from logdetective.utils import process_log, initialize_model, retrieve_log_content, format_snippets
from logdetective.utils import (
process_log, initialize_model, retrieve_log_content, format_snippets, compute_certainty)
from logdetective.extractors import LLMExtractor, DrainExtractor

LOG = logging.getLogger("logdetective")
Expand Down Expand Up @@ -92,15 +93,21 @@ def main():
if args.no_stream:
stream = False
response = process_log(log_summary, model, stream)
probs = []
print("Explanation:")
if args.no_stream:
print(response["choices"][0]["text"])
probs = response["choices"][0]["logprobs"]["top_logprobs"]
else:
# Stream the output
for chunk in response:
if isinstance(chunk["choices"][0]["logprobs"], dict):
probs.extend(chunk["choices"][0]["logprobs"]["top_logprobs"])
delta = chunk['choices'][0]['text']
print(delta, end='', flush=True)
print()
certainty = compute_certainty(probs)

print(f"\nResponse certainty: {certainty:.2f}%\n")


if __name__ == "__main__":
Expand Down
36 changes: 28 additions & 8 deletions logdetective/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
import os
from typing import Iterator, List, Dict
from urllib.parse import urlparse

import numpy as np
import requests

from llama_cpp import Llama
from llama_cpp import Llama, CreateCompletionResponse, CreateCompletionStreamResponse
from logdetective.constants import PROMPT_TEMPLATE


Expand Down Expand Up @@ -60,20 +61,36 @@ def initialize_model(model_pth: str, filename_suffix: str = ".gguf", verbose: bo
model = Llama(
model_path=model_pth,
n_ctx=0, # Maximum context for the model
verbose=verbose)
verbose=verbose,
logits_all=True)
else:
model = Llama.from_pretrained(
model_pth,
f"*{filename_suffix}",
n_ctx=0, # Maximum context for the model
verbose=verbose)
verbose=verbose,
logits_all=True)

return model


def process_log(log: str, model: Llama, stream: bool) -> str:
def compute_certainty(probs: List[Dict[str, float] | None]) -> float:
"""Compute certainty of repsponse based on average logit probability.
Log probability is log(p), isn't really readable for most people, especially in compound.
In this case it's just a matter of applying inverse operation exp.
Of course that leaves you with a value in range <0, 1> so it needs to be multiplied by 100.
Simply put, this is the most straightforward way to get the numbers out.
"""
Processes a given log using the provided language model and returns its summary.

top_logprobs = [
np.exp(x) * 100 for e in probs if isinstance(e, dict) for x in e.values()]

return np.median(top_logprobs, axis=0)


def process_log(log: str, model: Llama, stream: bool) -> (
CreateCompletionResponse | Iterator[CreateCompletionStreamResponse]):
"""Processes a given log using the provided language model and returns its summary.
Args:
log (str): The input log to be processed.
Expand All @@ -82,10 +99,13 @@ def process_log(log: str, model: Llama, stream: bool) -> str:
Returns:
str: The summary of the given log generated by the language model.
"""
return model(
response = model(
prompt=PROMPT_TEMPLATE.format(log),
stream=stream,
max_tokens=0)
max_tokens=0,
logprobs=1)

return response


def retrieve_log_content(log_path: str) -> str:
Expand Down
Loading

0 comments on commit 96611d0

Please sign in to comment.