First commit

This commit is contained in:
Saufi
2026-05-18 08:56:23 +08:00
commit fd3d3a4d2b
147 changed files with 22099 additions and 0 deletions

View File

@@ -0,0 +1,171 @@
<?php
namespace App\Jobs;
use App\Models\DocumentChunk;
use App\Models\ProcessingLog;
use App\Services\Ollama\OllamaService;
use App\Services\Qdrant\QdrantService;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Queue\Queueable;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Str;
use RuntimeException;
/**
* ReindexChunkJob
*
* Reindex satu chunk sahaja:
* 1. Embed semula getEmbeddableText() (final_text > cleaned_text > content)
* 2. Upsert point ke Qdrant (guna semula qdrant_point_id lama jika ada)
* 3. Kemaskini chunk: markAsEmbedded(), needs_reindex=false
*
* Berbeza dari ReindexDocumentJob yang reindex SELURUH dokumen.
* Job ini digunakan untuk:
* - Edit final_text oleh admin
* - Include semula chunk yang excluded
* - Manual trigger reindex
* - Child chunks hasil split (perlu embed buat pertama kali)
*/
class ReindexChunkJob implements ShouldQueue
{
use Queueable;
public int $tries = 3;
public int $backoff = 30; // Tunggu 30s sebelum retry
public int $timeout = 120; // 2 minit maksimum per chunk
public function __construct(
public readonly int $chunkId,
) {
$this->onQueue(config('knowledgebase.queue.ingestion', 'default'));
}
public function handle(OllamaService $ollama, QdrantService $qdrant): void
{
// Load chunk dengan semua relasi yang diperlukan untuk toQdrantPayload()
$chunk = DocumentChunk::with(['document.category', 'documentVersion'])
->find($this->chunkId);
if (! $chunk) {
Log::warning("ReindexChunkJob: Chunk #{$this->chunkId} tidak dijumpai. Job dilangkau.");
return;
}
// ── Guard: Jangan reindex chunk yang tidak sepatutnya ──────────────
if ($chunk->isSuperseded()) {
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} adalah superseded. Job dilangkau.", [
'chunk_index' => $chunk->chunk_index,
]);
return;
}
if ($chunk->exclude_from_index) {
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} dikecualikan dari index. Job dilangkau.", [
'chunk_index' => $chunk->chunk_index,
'chunk_status' => $chunk->chunk_status,
]);
return;
}
// ── Ambil teks untuk embedding ─────────────────────────────────────
$textToEmbed = $chunk->getEmbeddableText();
if (empty(trim($textToEmbed))) {
$chunk->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
Log::error("ReindexChunkJob: Chunk #{$this->chunkId} mempunyai teks kosong.", [
'has_final_text' => ! is_null($chunk->final_text),
'has_cleaned_text' => ! is_null($chunk->cleaned_text),
'content_length' => mb_strlen($chunk->content),
]);
return;
}
// ── Log: mula proses ───────────────────────────────────────────────
ProcessingLog::record(
DocumentChunk::class,
$chunk->id,
ProcessingLog::STAGE_EMBED,
ProcessingLog::STATUS_STARTED,
null,
[
'chunk_index' => $chunk->chunk_index,
'text_source' => $chunk->final_text ? 'final_text'
: ($chunk->cleaned_text ? 'cleaned_text' : 'content'),
'text_length' => mb_strlen($textToEmbed),
'is_reindex' => true,
]
);
try {
// ── Embed teks ─────────────────────────────────────────────────
$vector = $ollama->embed($textToEmbed);
// ── Tentukan point ID ──────────────────────────────────────────
// Guna semula qdrant_point_id lama jika ada → upsert akan overwrite
// Ini mengelakkan "ghost points" yang tidak dirujuk oleh mana-mana chunk
$pointId = $chunk->qdrant_point_id ?? (string) Str::uuid();
// ── Upsert ke Qdrant ───────────────────────────────────────────
$qdrant->ensureCollectionExists();
$qdrant->upsertPoint($pointId, $vector, $chunk->toQdrantPayload());
// ── Kemaskini chunk ────────────────────────────────────────────
$chunk->markAsEmbedded($pointId);
ProcessingLog::record(
DocumentChunk::class,
$chunk->id,
ProcessingLog::STAGE_QDRANT,
ProcessingLog::STATUS_COMPLETED,
null,
[
'point_id' => $pointId,
'is_reindex' => true,
]
);
Log::info("ReindexChunkJob: Chunk #{$this->chunkId} berjaya direindex.", [
'chunk_index' => $chunk->chunk_index,
'point_id' => $pointId,
]);
} catch (RuntimeException $e) {
// Tandakan failed — job akan cuba semula (mengikut $tries)
$chunk->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
ProcessingLog::record(
DocumentChunk::class,
$chunk->id,
ProcessingLog::STAGE_EMBED,
ProcessingLog::STATUS_FAILED,
$e->getMessage(),
['attempt' => $this->attempts()]
);
Log::error("ReindexChunkJob: Gagal reindex chunk #{$this->chunkId}.", [
'error' => $e->getMessage(),
'attempt' => $this->attempts(),
]);
throw $e; // Allow retry
}
}
/**
* Dipanggil selepas semua retry habis.
*/
public function failed(\Throwable $e): void
{
DocumentChunk::where('id', $this->chunkId)
->update(['chunk_status' => DocumentChunk::STATUS_FAILED_EMBEDDING]);
Log::error("ReindexChunkJob: Chunk #{$this->chunkId} gagal selepas semua cubaan semula.", [
'error' => $e->getMessage(),
]);
}
}