172 lines
6.5 KiB
PHP
172 lines
6.5 KiB
PHP
<?php
|
|
|
|
namespace App\Jobs;
|
|
|
|
use App\Models\DocumentChunk;
|
|
use App\Models\ProcessingLog;
|
|
use App\Services\Ollama\OllamaService;
|
|
use App\Services\Qdrant\QdrantService;
|
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
|
use Illuminate\Foundation\Queue\Queueable;
|
|
use Illuminate\Support\Facades\Log;
|
|
use Illuminate\Support\Str;
|
|
use RuntimeException;
|
|
|
|
/**
|
|
* ReindexChunkJob
|
|
*
|
|
* Reindex satu chunk sahaja:
|
|
* 1. Embed semula getEmbeddableText() (final_text > cleaned_text > content)
|
|
* 2. Upsert point ke Qdrant (guna semula qdrant_point_id lama jika ada)
|
|
* 3. Kemaskini chunk: markAsEmbedded(), needs_reindex=false
|
|
*
|
|
* Berbeza dari ReindexDocumentJob yang reindex SELURUH dokumen.
|
|
* Job ini digunakan untuk:
|
|
* - Edit final_text oleh admin
|
|
* - Include semula chunk yang excluded
|
|
* - Manual trigger reindex
|
|
* - Child chunks hasil split (perlu embed buat pertama kali)
|
|
*/
|
|
class ReindexChunkJob implements ShouldQueue
|
|
{
|
|
use Queueable;
|
|
|
|
public int $tries = 3;
|
|
public int $backoff = 30; // Tunggu 30s sebelum retry
|
|
public int $timeout = 120; // 2 minit maksimum per chunk
|
|
|
|
public function __construct(
|
|
public readonly int $chunkId,
|
|
) {
|
|
$this->onQueue(config('knowledgebase.queue.ingestion', 'default'));
|
|
}
|
|
|
|
public function handle(OllamaService $ollama, QdrantService $qdrant): void
|
|
{
|
|
// Load chunk dengan semua relasi yang diperlukan untuk toQdrantPayload()
|
|
$chunk = DocumentChunk::with(['document.category', 'documentVersion'])
|
|
->find($this->chunkId);
|
|
|
|
if (! $chunk) {
|
|
Log::warning("ReindexChunkJob: Chunk #{$this->chunkId} tidak dijumpai. Job dilangkau.");
|
|
return;
|
|
}
|
|
|
|
// ── Guard: Jangan reindex chunk yang tidak sepatutnya ──────────────
|
|
|
|
if ($chunk->isSuperseded()) {
|
|
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} adalah superseded. Job dilangkau.", [
|
|
'chunk_index' => $chunk->chunk_index,
|
|
]);
|
|
return;
|
|
}
|
|
|
|
if ($chunk->exclude_from_index) {
|
|
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} dikecualikan dari index. Job dilangkau.", [
|
|
'chunk_index' => $chunk->chunk_index,
|
|
'chunk_status' => $chunk->chunk_status,
|
|
]);
|
|
return;
|
|
}
|
|
|
|
// ── Ambil teks untuk embedding ─────────────────────────────────────
|
|
|
|
$textToEmbed = $chunk->getEmbeddableText();
|
|
|
|
if (empty(trim($textToEmbed))) {
|
|
$chunk->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
|
|
|
|
Log::error("ReindexChunkJob: Chunk #{$this->chunkId} mempunyai teks kosong.", [
|
|
'has_final_text' => ! is_null($chunk->final_text),
|
|
'has_cleaned_text' => ! is_null($chunk->cleaned_text),
|
|
'content_length' => mb_strlen($chunk->content),
|
|
]);
|
|
return;
|
|
}
|
|
|
|
// ── Log: mula proses ───────────────────────────────────────────────
|
|
|
|
ProcessingLog::record(
|
|
DocumentChunk::class,
|
|
$chunk->id,
|
|
ProcessingLog::STAGE_EMBED,
|
|
ProcessingLog::STATUS_STARTED,
|
|
null,
|
|
[
|
|
'chunk_index' => $chunk->chunk_index,
|
|
'text_source' => $chunk->final_text ? 'final_text'
|
|
: ($chunk->cleaned_text ? 'cleaned_text' : 'content'),
|
|
'text_length' => mb_strlen($textToEmbed),
|
|
'is_reindex' => true,
|
|
]
|
|
);
|
|
|
|
try {
|
|
// ── Embed teks ─────────────────────────────────────────────────
|
|
$vector = $ollama->embed($textToEmbed);
|
|
|
|
// ── Tentukan point ID ──────────────────────────────────────────
|
|
// Guna semula qdrant_point_id lama jika ada → upsert akan overwrite
|
|
// Ini mengelakkan "ghost points" yang tidak dirujuk oleh mana-mana chunk
|
|
$pointId = $chunk->qdrant_point_id ?? (string) Str::uuid();
|
|
|
|
// ── Upsert ke Qdrant ───────────────────────────────────────────
|
|
$qdrant->ensureCollectionExists();
|
|
$qdrant->upsertPoint($pointId, $vector, $chunk->toQdrantPayload());
|
|
|
|
// ── Kemaskini chunk ────────────────────────────────────────────
|
|
$chunk->markAsEmbedded($pointId);
|
|
|
|
ProcessingLog::record(
|
|
DocumentChunk::class,
|
|
$chunk->id,
|
|
ProcessingLog::STAGE_QDRANT,
|
|
ProcessingLog::STATUS_COMPLETED,
|
|
null,
|
|
[
|
|
'point_id' => $pointId,
|
|
'is_reindex' => true,
|
|
]
|
|
);
|
|
|
|
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} berjaya direindex.", [
|
|
'chunk_index' => $chunk->chunk_index,
|
|
'point_id' => $pointId,
|
|
]);
|
|
|
|
} catch (RuntimeException $e) {
|
|
// Tandakan failed — job akan cuba semula (mengikut $tries)
|
|
$chunk->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
|
|
|
|
ProcessingLog::record(
|
|
DocumentChunk::class,
|
|
$chunk->id,
|
|
ProcessingLog::STAGE_EMBED,
|
|
ProcessingLog::STATUS_FAILED,
|
|
$e->getMessage(),
|
|
['attempt' => $this->attempts()]
|
|
);
|
|
|
|
Log::error("ReindexChunkJob: Gagal reindex chunk #{$this->chunkId}.", [
|
|
'error' => $e->getMessage(),
|
|
'attempt' => $this->attempts(),
|
|
]);
|
|
|
|
throw $e; // Allow retry
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Dipanggil selepas semua retry habis.
|
|
*/
|
|
public function failed(\Throwable $e): void
|
|
{
|
|
DocumentChunk::where('id', $this->chunkId)
|
|
->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
|
|
|
|
Log::error("ReindexChunkJob: Chunk #{$this->chunkId} gagal selepas semua cubaan semula.", [
|
|
'error' => $e->getMessage(),
|
|
]);
|
|
}
|
|
}
|