maxContextChunks = config('knowledgebase.rag.max_context_chunks', 5); $this->maxContextWords = config('knowledgebase.rag.max_context_words', 2000); } /** * Jawab soalan menggunakan RAG. * * @param string $question Soalan pengguna * @param ?int $categoryId Filter kategori (null = semua) * @return array{ * answer: string, * has_answer: bool, * sources: array[], * context_chunks: array[], * model_used: string, * tokens_used: ?int, * response_time: float * } * @throws RuntimeException Jika Ollama atau Qdrant tidak tersedia */ public function ask(string $question, ?int $categoryId = null): array { $startTime = microtime(true); // ── Langkah 1: Jana embedding untuk soalan ───────────────────────── $queryVector = $this->ollama->embed($question); // ── Langkah 2: Cari context relevan dari Qdrant ───────────────────── $filter = $this->qdrant->buildFilter( categoryId: $categoryId, isActive: true, ); $scoreThreshold = config('qdrant.search.score_threshold', 0.3); $searchResults = $this->qdrant->searchSimilar( vector: $queryVector, limit: $this->maxContextChunks, filter: $filter, scoreThreshold: $scoreThreshold, ); //log search result \Log::info('Qdrant search raw results', [ 'question' => $question, 'results' => $searchResults, ]); \Log::info('Qdrant raw results', [ 'scores' => array_map(fn($r) => $r['score'] ?? null, $searchResults), ]); if (empty($searchResults)) { $responseTime = round(microtime(true) - $startTime, 3); return [ 'answer' => config('ollama.rag_system_prompt_no_result', 'Maaf, saya tidak menemui maklumat berkaitan dalam pangkalan pengetahuan kami. ' . 'Sila hubungi pejabat kami untuk maklumat lanjut.'), 'has_answer' => false, 'sources' => [], 'context_chunks' => [], 'model_used' => config('ollama.chat_model'), 'tokens_used' => null, 'response_time' => $responseTime, ]; } // ── Langkah 3: Bina context string ───────────────────────────────── [$context, $contextChunksData] = $this->buildContext($searchResults); // ── Langkah 4: Hantar ke Ollama ───────────────────────────────────── $chatResult = $this->ollama->chat($question, $context); // ── Langkah 5: Bina source references ────────────────────────────── $sources = $this->buildSourceReferences($searchResults); $responseTime = round(microtime(true) - $startTime, 3); // Tentukan sama ada model ada jawapan atau tidak $hasAnswer = $this->detectHasAnswer($chatResult['answer']); return [ 'answer' => $chatResult['answer'], 'has_answer' => $hasAnswer, 'sources' => $sources, 'context_chunks' => $contextChunksData, 'model_used' => $chatResult['model'], 'tokens_used' => $chatResult['tokens'], 'response_time' => $responseTime, ]; } /** * Bina context string dari search results. * Had bilangan perkataan supaya tidak melebihi context window model. * * @return array{0: string, 1: array[]} */ private function buildContext(array $searchResults): array { $contextParts = []; $chunksData = []; $totalWords = 0; foreach ($searchResults as $result) { $payload = $result['payload'] ?? []; $text = $payload['text'] ?? ''; if (empty($text)) { continue; } $words = str_word_count($text); if ($totalWords + $words > $this->maxContextWords) { // Potong jika context dah terlalu panjang if (empty($contextParts)) { // Sekurang-kurangnya masukkan satu chunk $contextParts[] = $text; $chunksData[] = $this->extractChunkData($result); } break; } $source = $this->formatSourceLabel($payload); $contextParts[] = "[Sumber: {$source}]\n{$text}"; $chunksData[] = $this->extractChunkData($result); $totalWords += $words; } return [implode("\n\n---\n\n", $contextParts), $chunksData]; } /** * Bina array source references untuk paparan kepada pengguna. */ private function buildSourceReferences(array $searchResults): array { $sources = []; $seen = []; // Elak duplikasi sumber yang sama foreach ($searchResults as $result) { $payload = $result['payload'] ?? []; $sourceKey = ($payload['document_id'] ?? '') . '_' . ($payload['knowledge_item_id'] ?? '') . '_' . ($payload['page_number'] ?? ''); if (isset($seen[$sourceKey])) { continue; } $seen[$sourceKey] = true; $sources[] = [ 'type' => $payload['source_type'] ?? 'unknown', 'knowledge_type' => $payload['knowledge_type'] ?? '', 'title' => $payload['title'] ?? 'Tiada tajuk', 'category' => $payload['category_name'] ?? '', 'category_id' => $payload['category_id'] ?? null, 'page_number' => $payload['page_number'] ?? null, 'section_heading' => $payload['section_heading'] ?? null, 'document_id' => $payload['document_id'] ?? null, 'knowledge_item_id' => $payload['knowledge_item_id'] ?? null, 'score' => round($result['score'] ?? 0, 4), ]; } return $sources; } /** * Extract data chunk untuk disimpan dalam chat_logs. */ private function extractChunkData(array $result): array { return [ 'point_id' => $result['id'] ?? null, 'score' => round($result['score'] ?? 0, 4), 'title' => $result['payload']['title'] ?? '', 'category' => $result['payload']['category_name'] ?? '', 'source_type' => $result['payload']['source_type'] ?? '', 'page_number' => $result['payload']['page_number'] ?? null, ]; } private function formatSourceLabel(array $payload): string { $title = $payload['title'] ?? 'Tanpa tajuk'; $page = isset($payload['page_number']) ? ", ms. {$payload['page_number']}" : ''; $category = $payload['category_name'] ?? ''; return "{$title}{$page} ({$category})"; } /** * Detect sama ada model sebenarnya ada jawapan atau tidak. * Semak jika jawapan adalah "tidak tahu" / fallback. */ private function detectHasAnswer(string $answer): bool { $noAnswerPatterns = [ 'tidak menemui', 'tiada maklumat', 'tidak terdapat dalam', 'sila hubungi', 'tidak dapat menjawab', 'maklumat tidak tersedia', ]; $answerLower = mb_strtolower($answer); foreach ($noAnswerPatterns as $pattern) { if (str_contains($answerLower, $pattern)) { return false; } } return !empty(trim($answer)); } }