From bf0d34b665836f8b99b7d9e0cbf49c9dc9752486 Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Thu, 22 Feb 2024 23:27:35 +0530 Subject: [PATCH 1/2] fix: metadata search for provider Signed-off-by: Anupam Kumar --- lib/BackgroundJobs/IndexerJob.php | 2 +- lib/BackgroundJobs/SubmitContentJob.php | 4 +++- lib/Listener/AppDisableListener.php | 2 +- lib/Public/ContentManager.php | 2 +- lib/Service/LangRopeService.php | 11 ++++++----- lib/Service/ScanService.php | 1 + lib/Type/Source.php | 1 + 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/BackgroundJobs/IndexerJob.php b/lib/BackgroundJobs/IndexerJob.php index b7bcb3f..c3f0d0b 100644 --- a/lib/BackgroundJobs/IndexerJob.php +++ b/lib/BackgroundJobs/IndexerJob.php @@ -125,7 +125,7 @@ protected function index(array $files): void { $userIds = $this->storageService->getUsersForFileId($queueFile->getFileId()); foreach ($userIds as $userId) { try { - $source = new Source($userId, 'file: ' . $file->getId(), $file->getPath(), $fileHandle, $file->getMtime(), $file->getMimeType()); + $source = new Source($userId, 'file: ' . $file->getId(), $file->getPath(), $fileHandle, $file->getMtime(), $file->getMimeType(), 'file'); } catch (InvalidPathException|NotFoundException $e) { $this->logger->error('Could not find file ' . $file->getPath(), ['exception' => $e]); continue 2; diff --git a/lib/BackgroundJobs/SubmitContentJob.php b/lib/BackgroundJobs/SubmitContentJob.php index bb3497f..8916199 100644 --- a/lib/BackgroundJobs/SubmitContentJob.php +++ b/lib/BackgroundJobs/SubmitContentJob.php @@ -58,7 +58,8 @@ protected function run($argument): void { foreach ($bucketed as $userId => $entities) { $sources = array_map(function (QueueContentItem $item) use ($userId) { - $sourceId = ProviderConfigService::getConfigKey($item->getAppId(), $item->getProviderId()) . ': ' . $item->getItemId(); + $providerKey = ProviderConfigService::getConfigKey($item->getAppId(), $item->getProviderId()); + $sourceId = $providerKey . ': ' . $item->getItemId(); return new Source( $userId, $sourceId, @@ -66,6 +67,7 @@ protected function run($argument): void { $item->getContent(), $item->getLastModified()->getTimeStamp(), $item->getDocumentType(), + $providerKey, ); }, $entities); diff --git a/lib/Listener/AppDisableListener.php b/lib/Listener/AppDisableListener.php index b111648..b71788c 100644 --- a/lib/Listener/AppDisableListener.php +++ b/lib/Listener/AppDisableListener.php @@ -58,7 +58,7 @@ public function handle(Event $event): void { } $this->configService->removeProvider($appId, $providerId); - $this->service->deleteMatchingSources($this->userId, $key); + $this->service->deleteSourcesByProvider($this->userId, $key); } } } diff --git a/lib/Public/ContentManager.php b/lib/Public/ContentManager.php index 8e66f56..ca02c78 100644 --- a/lib/Public/ContentManager.php +++ b/lib/Public/ContentManager.php @@ -112,7 +112,7 @@ public function removeContentForUsers(string $appId, string $providerId, string */ public function removeAllContentForUsers(string $appId, string $providerId, array $users): void { foreach ($users as $userId) { - $this->service->deleteMatchingSources($userId, $this->configService->getConfigKey($appId, $providerId)); + $this->service->deleteSourcesByProvider($userId, $this->configService->getConfigKey($appId, $providerId)); } } } diff --git a/lib/Service/LangRopeService.php b/lib/Service/LangRopeService.php index c766825..50c81af 100644 --- a/lib/Service/LangRopeService.php +++ b/lib/Service/LangRopeService.php @@ -119,16 +119,16 @@ private function requestToExApp( /** * @param string $userId - * @param string $keyword Keyword to search for in the source names + * @param string $providerKey * @return void */ - public function deleteMatchingSources(string $userId, string $keyword): void { + public function deleteSourcesByProvider(string $userId, string $providerKey): void { $params = [ 'userId' => $userId, - 'keyword' => $keyword, + 'providerKey' => $providerKey, ]; - $this->requestToExApp('/deleteMatchingSources', 'POST', $params); + $this->requestToExApp('/deleteSourcesByProvider', 'POST', $params); } /** @@ -161,13 +161,14 @@ public function indexSources(array $sources): void { $params = array_map(function (Source $source) { return [ 'name' => 'sources', - 'filename' => $source->reference, // 'file: 555' + 'filename' => $source->reference, // eg. 'file: 555' 'contents' => $source->content, 'headers' => [ 'userId' => $source->userId, 'title' => $source->title, 'type' => $source->type, 'modified' => $source->modified, + 'provider' => $source->provider, // eg. 'file' ], ]; }, $sources); diff --git a/lib/Service/ScanService.php b/lib/Service/ScanService.php index fcbe69a..76178a2 100644 --- a/lib/Service/ScanService.php +++ b/lib/Service/ScanService.php @@ -74,6 +74,7 @@ public function scanDirectory(string $userId, array $mimeTypeFilter, Folder $dir $fileHandle, $node->getMTime(), $node->getMimeType(), + 'file' ); $sources[] = $source; $size += $node_size; diff --git a/lib/Type/Source.php b/lib/Type/Source.php index 32b69eb..f0d669c 100644 --- a/lib/Type/Source.php +++ b/lib/Type/Source.php @@ -19,6 +19,7 @@ public function __construct( public mixed $content, public int | string $modified, public string $type, + public string $provider, ) { } } From f1a49ce0adb1f7b75651aebaa4a56e5631e5951e Mon Sep 17 00:00:00 2001 From: Anupam Kumar Date: Fri, 23 Feb 2024 16:24:24 +0530 Subject: [PATCH 2/2] update integration-test.yml - use pip cache - reduce scanned files to just admin_manual/ - capture and show logs from context_chat_backend Signed-off-by: Anupam Kumar --- .github/workflows/integration-test.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 24a0c76..59e63c1 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -142,23 +142,23 @@ jobs: - name: Prepare docs run: | cd data/admin/files/documentation - find ./ -depth -name "*.rst" -exec sh -c 'mv "$1" "${1%.rst}.txt"' _ {} \; + find ./admin_manual/ -depth -name "*.rst" -exec sh -c 'mv "$1" "${1%.rst}.txt"' _ {} \; git status - name: Setup python 3.11 uses: actions/setup-python@v5 with: python-version: '3.11' + cache: 'pip' + cache-dependency-path: context_chat_backend/reqs.txt - name: Install and init backend run: | cd context_chat_backend pip install --no-deps -r reqs.txt cp example.env .env - echo "DISABLE_CUSTOM_DOWNLOAD_URI=1" >> .env echo "NEXTCLOUD_URL=http://localhost:8080" >> .env - curl -L https://huggingface.co/TheBloke/dolphin-2.2.1-mistral-7B-GGUF/resolve/main/dolphin-2.2.1-mistral-7b.Q5_K_M.gguf -o model_files/dolphin-2.2.1-mistral-7b.Q5_K_M.gguf - ./main.py & + ./main.py &> backend_logs & - name: Register backend run: | @@ -167,14 +167,16 @@ jobs: - name: Scan files run: | - ./occ files:scan --all + ./occ files:scan admin ./occ context_chat:scan -m text/plain admin - name: Run prompt run: | ./occ context_chat:prompt admin "Which factors are taken into account for the Ethical AI Rating?" - - name: Show log on failure + - name: Show logs if: always() run: | tail data/nextcloud.log + echo '--------------------------------------------------' + [ -f context_chat_backend/backend_logs ] && cat context_chat_backend/backend_logs || echo "No backend logs"