Wiki source code of FactHarbor POC1 Architecture Analysis
Version 2.1 by Robert Schaub on 2026/01/02 10:01
Show last authors
| author | version | line-number | content |
|---|---|---|---|
| 1 | = FactHarbor POC1 Architecture Analysis = | ||
| 2 | |||
| 3 | |||
| 4 | **Version:** 2.6.17 | ||
| 5 | **Analysis Date:** January 2026 | ||
| 6 | **Document Purpose:** Technical diagrams, gap analysis, and optimization recommendations | ||
| 7 | |||
| 8 | ----- | ||
| 9 | |||
| 10 | ---- | ||
| 11 | |||
| 12 | == 1. AKEL Flow Diagram (with LLM and WebSearch Interactions) == | ||
| 13 | |||
| 14 | |||
| 15 | {{mermaid}} | ||
| 16 | flowchart TB | ||
| 17 | subgraph Input["📥 Input Layer"] | ||
| 18 | URL[URL Input] | ||
| 19 | TEXT[Text Input] | ||
| 20 | end | ||
| 21 | |||
| 22 | subgraph Retrieval["🔍 Content Retrieval"] | ||
| 23 | FETCH[extractTextFromUrl] | ||
| 24 | PDF[PDF Parser<br/>pdf-parse v1] | ||
| 25 | HTML[HTML Parser<br/>cheerio] | ||
| 26 | end | ||
| 27 | |||
| 28 | subgraph AKEL["🧠 AKEL Pipeline"] | ||
| 29 | direction TB | ||
| 30 | |||
| 31 | subgraph Step1["Step 1: Understand"] | ||
| 32 | UNDERSTAND[understandClaim<br/>━━━━━━━━━━━━━<br/>• Detect input type<br/>• Extract claims<br/>• Identify dependencies<br/>• Assign risk tiers] | ||
| 33 | LLM1[("🤖 LLM Call #1<br/>Claude/GPT/Gemini")] | ||
| 34 | end | ||
| 35 | |||
| 36 | subgraph Step2["Step 2: Research (Iterative)"] | ||
| 37 | DECIDE[decideNextResearch<br/>━━━━━━━━━━━━━<br/>• Generate queries<br/>• Focus areas] | ||
| 38 | |||
| 39 | SEARCH[("🌐 Web Search<br/>Google CSE / SerpAPI")] | ||
| 40 | |||
| 41 | FETCHSRC[fetchSourceContent<br/>━━━━━━━━━━━━━<br/>• Parallel fetching<br/>• Timeout handling] | ||
| 42 | |||
| 43 | EXTRACT[extractFacts<br/>━━━━━━━━━━━━━<br/>• Parse sources<br/>• Extract facts] | ||
| 44 | LLM2[("🤖 LLM Call #2-N<br/>Per source")] | ||
| 45 | end | ||
| 46 | |||
| 47 | subgraph Step3["Step 3: Verdict Generation"] | ||
| 48 | VERDICT[generateVerdicts<br/>━━━━━━━━━━━━━<br/>• Claim verdicts<br/>• Article verdict<br/>• Dependency propagation] | ||
| 49 | LLM3[("🤖 LLM Call #N+1<br/>Final synthesis")] | ||
| 50 | end | ||
| 51 | |||
| 52 | subgraph Step4["Step 4: Report"] | ||
| 53 | REPORT[buildTwoPanelSummary<br/>━━━━━━━━━━━━━<br/>• Format results<br/>• Generate markdown] | ||
| 54 | end | ||
| 55 | end | ||
| 56 | |||
| 57 | subgraph Output["📤 Output"] | ||
| 58 | RESULT[AnalysisResult JSON] | ||
| 59 | MARKDOWN[Report Markdown] | ||
| 60 | end | ||
| 61 | |||
| 62 | %% Flow connections | ||
| 63 | URL --> FETCH | ||
| 64 | TEXT --> UNDERSTAND | ||
| 65 | FETCH --> PDF | ||
| 66 | FETCH --> HTML | ||
| 67 | PDF --> UNDERSTAND | ||
| 68 | HTML --> UNDERSTAND | ||
| 69 | |||
| 70 | UNDERSTAND --> LLM1 | ||
| 71 | LLM1 --> DECIDE | ||
| 72 | |||
| 73 | DECIDE --> SEARCH | ||
| 74 | SEARCH --> FETCHSRC | ||
| 75 | FETCHSRC --> EXTRACT | ||
| 76 | EXTRACT --> LLM2 | ||
| 77 | LLM2 --> DECIDE | ||
| 78 | |||
| 79 | DECIDE -->|"Research Complete"| VERDICT | ||
| 80 | VERDICT --> LLM3 | ||
| 81 | LLM3 --> REPORT | ||
| 82 | |||
| 83 | REPORT --> RESULT | ||
| 84 | REPORT --> MARKDOWN | ||
| 85 | |||
| 86 | %% Styling | ||
| 87 | classDef llm fill:#e1f5fe,stroke:#01579b,stroke-width:2px | ||
| 88 | classDef search fill:#fff3e0,stroke:#e65100,stroke-width:2px | ||
| 89 | classDef step fill:#f3e5f5,stroke:#4a148c,stroke-width:2px | ||
| 90 | |||
| 91 | class LLM1,LLM2,LLM3 llm | ||
| 92 | class SEARCH search | ||
| 93 | class UNDERSTAND,DECIDE,FETCHSRC,EXTRACT,VERDICT,REPORT step | ||
| 94 | {{/mermaid}} | ||
| 95 | |||
| 96 | ----- | ||
| 97 | |||
| 98 | |||
| 99 | == 2. ERD Data Model (Current POC1 Implementation) == | ||
| 100 | |||
| 101 | |||
| 102 | {{mermaid}} | ||
| 103 | erDiagram | ||
| 104 | JOB ||--o{ JOB_EVENT : "has" | ||
| 105 | JOB ||--|| ANALYSIS_RESULT : "produces" | ||
| 106 | ANALYSIS_RESULT ||--o{ CLAIM_VERDICT : "contains" | ||
| 107 | ANALYSIS_RESULT ||--o{ FETCHED_SOURCE : "references" | ||
| 108 | ANALYSIS_RESULT ||--o{ EXTRACTED_FACT : "contains" | ||
| 109 | CLAIM_VERDICT }o--o{ EXTRACTED_FACT : "supported by" | ||
| 110 | FETCHED_SOURCE ||--o{ EXTRACTED_FACT : "provides" | ||
| 111 | CLAIM_VERDICT ||--o{ CLAIM_VERDICT : "depends on" | ||
| 112 | |||
| 113 | JOB { | ||
| 114 | string JobId PK "GUID" | ||
| 115 | string Status "QUEUED|RUNNING|COMPLETE|FAILED" | ||
| 116 | int Progress "0-100" | ||
| 117 | datetime CreatedUtc | ||
| 118 | datetime UpdatedUtc | ||
| 119 | string InputType "text|url" | ||
| 120 | string InputValue "URL or text content" | ||
| 121 | string InputPreview "First 100 chars" | ||
| 122 | json ResultJson "Full analysis result" | ||
| 123 | string ReportMarkdown "Formatted report" | ||
| 124 | } | ||
| 125 | |||
| 126 | JOB_EVENT { | ||
| 127 | long Id PK | ||
| 128 | string JobId FK | ||
| 129 | datetime TsUtc | ||
| 130 | string Level "info|warn|error" | ||
| 131 | string Message | ||
| 132 | } | ||
| 133 | |||
| 134 | ANALYSIS_RESULT { | ||
| 135 | string schemaVersion "2.6.17" | ||
| 136 | string inputType "question|claim|article" | ||
| 137 | boolean isQuestion | ||
| 138 | string articleThesis | ||
| 139 | int articleTruthPercentage "0-100" | ||
| 140 | string articleVerdict "7-point scale" | ||
| 141 | json claimPattern "total/supported/uncertain/refuted" | ||
| 142 | boolean isPseudoscience | ||
| 143 | int llmCalls "Total LLM invocations" | ||
| 144 | json searchQueries "All search queries" | ||
| 145 | } | ||
| 146 | |||
| 147 | CLAIM_VERDICT { | ||
| 148 | string claimId PK "SC1, SC2, etc." | ||
| 149 | string claimText | ||
| 150 | boolean isCentral | ||
| 151 | string claimRole "attribution|source|timing|core" | ||
| 152 | string_array dependsOn "Prerequisite claim IDs" | ||
| 153 | boolean dependencyFailed | ||
| 154 | string llmVerdict "WELL-SUPPORTED|PARTIALLY-SUPPORTED|UNCERTAIN|REFUTED" | ||
| 155 | string verdict "7-point: True to False" | ||
| 156 | int confidence "0-100" | ||
| 157 | int truthPercentage "0-100" | ||
| 158 | string riskTier "A|B|C" | ||
| 159 | string reasoning | ||
| 160 | string_array supportingFactIds | ||
| 161 | string highlightColor "green to dark-red" | ||
| 162 | } | ||
| 163 | |||
| 164 | FETCHED_SOURCE { | ||
| 165 | string id PK "S1, S2, etc." | ||
| 166 | string url | ||
| 167 | string title | ||
| 168 | int trackRecordScore "0-100 or null" | ||
| 169 | string fullText "Extracted content" | ||
| 170 | datetime fetchedAt | ||
| 171 | string category "legal|news|academic" | ||
| 172 | boolean fetchSuccess | ||
| 173 | string searchQuery "Which query found this" | ||
| 174 | } | ||
| 175 | |||
| 176 | EXTRACTED_FACT { | ||
| 177 | string id PK "S1-F1, S1-F2, etc." | ||
| 178 | string fact "The factual statement" | ||
| 179 | string category "legal_provision|evidence|expert_quote|statistic|event|criticism" | ||
| 180 | string specificity "high|medium" | ||
| 181 | string sourceId FK | ||
| 182 | string sourceUrl | ||
| 183 | string sourceTitle | ||
| 184 | string sourceExcerpt | ||
| 185 | string relatedProceedingId | ||
| 186 | boolean isContestedClaim | ||
| 187 | string claimSource | ||
| 188 | } | ||
| 189 | {{/mermaid}} | ||
| 190 | |||
| 191 | ----- | ||
| 192 | |||
| 193 | |||
| 194 | == 3. Overall Architecture with Interactions == | ||
| 195 | |||
| 196 | |||
| 197 | {{mermaid}} | ||
| 198 | flowchart TB | ||
| 199 | subgraph Client["🖥️ Client Layer"] | ||
| 200 | BROWSER[Web Browser] | ||
| 201 | ANALYZE_PAGE["/analyze page<br/>React + TailwindCSS"] | ||
| 202 | JOBS_PAGE["/jobs page<br/>Job history & status"] | ||
| 203 | end | ||
| 204 | |||
| 205 | subgraph NextJS["⚡ Next.js Web App (apps/web)"] | ||
| 206 | direction TB | ||
| 207 | |||
| 208 | subgraph API_Routes["API Routes"] | ||
| 209 | ANALYZE_API["/api/fh/analyze<br/>━━━━━━━━━━━━━<br/>POST: Create job"] | ||
| 210 | JOBS_API["/api/fh/jobs<br/>━━━━━━━━━━━━━<br/>GET: List jobs<br/>POST: Create job"] | ||
| 211 | JOB_API["/api/fh/jobs/[id]<br/>━━━━━━━━━━━━━<br/>GET: Job status"] | ||
| 212 | EVENTS_API["/api/fh/jobs/[id]/events<br/>━━━━━━━━━━━━━<br/>GET: Job events (SSE)"] | ||
| 213 | RUN_JOB["/api/internal/run-job<br/>━━━━━━━━━━━━━<br/>POST: Execute analysis"] | ||
| 214 | end | ||
| 215 | |||
| 216 | subgraph Lib["Core Libraries"] | ||
| 217 | ANALYZER["analyzer.ts<br/>━━━━━━━━━━━━━<br/>AKEL Pipeline<br/>2918 lines"] | ||
| 218 | RETRIEVAL["retrieval.ts<br/>━━━━━━━━━━━━━<br/>URL content extraction"] | ||
| 219 | WEBSEARCH["web-search.ts<br/>━━━━━━━━━━━━━<br/>Search abstraction"] | ||
| 220 | MBFC["mbfc-loader.ts<br/>━━━━━━━━━━━━━<br/>Source reliability"] | ||
| 221 | end | ||
| 222 | end | ||
| 223 | |||
| 224 | subgraph DotNet["🔧 .NET API (apps/api)"] | ||
| 225 | DOTNET_API["FactHarbor.Api<br/>ASP.NET Core"] | ||
| 226 | |||
| 227 | subgraph Controllers["Controllers"] | ||
| 228 | ANALYZE_CTRL["AnalyzeController"] | ||
| 229 | JOBS_CTRL["JobsController"] | ||
| 230 | INTERNAL_CTRL["InternalJobsController"] | ||
| 231 | end | ||
| 232 | |||
| 233 | subgraph Services["Services"] | ||
| 234 | JOB_SVC["JobService<br/>━━━━━━━━━━━━━<br/>Job CRUD operations"] | ||
| 235 | RUNNER_CLIENT["RunnerClient<br/>━━━━━━━━━━━━━<br/>Calls Next.js runner"] | ||
| 236 | end | ||
| 237 | |||
| 238 | DB[(SQLite Database<br/>━━━━━━━━━━━━━<br/>JobEntity<br/>JobEventEntity)] | ||
| 239 | end | ||
| 240 | |||
| 241 | subgraph External["🌐 External Services"] | ||
| 242 | LLM_PROVIDERS["LLM Providers<br/>━━━━━━━━━━━━━<br/>• Anthropic Claude<br/>• OpenAI GPT<br/>• Google Gemini<br/>• Mistral"] | ||
| 243 | SEARCH_PROVIDERS["Search Providers<br/>━━━━━━━━━━━━━<br/>• Google CSE<br/>• SerpAPI<br/>• Brave<br/>• Tavily"] | ||
| 244 | WEB["Web Content<br/>━━━━━━━━━━━━━<br/>• News sites<br/>• PDFs<br/>• Academic sources"] | ||
| 245 | end | ||
| 246 | |||
| 247 | %% Client interactions | ||
| 248 | BROWSER --> ANALYZE_PAGE | ||
| 249 | BROWSER --> JOBS_PAGE | ||
| 250 | ANALYZE_PAGE --> ANALYZE_API | ||
| 251 | JOBS_PAGE --> JOBS_API | ||
| 252 | |||
| 253 | %% Next.js internal | ||
| 254 | ANALYZE_API --> JOBS_API | ||
| 255 | JOBS_API -->|"Proxy"| DOTNET_API | ||
| 256 | JOB_API -->|"Proxy"| DOTNET_API | ||
| 257 | EVENTS_API -->|"Proxy"| DOTNET_API | ||
| 258 | |||
| 259 | %% .NET flow | ||
| 260 | DOTNET_API --> ANALYZE_CTRL | ||
| 261 | DOTNET_API --> JOBS_CTRL | ||
| 262 | DOTNET_API --> INTERNAL_CTRL | ||
| 263 | ANALYZE_CTRL --> JOB_SVC | ||
| 264 | JOBS_CTRL --> JOB_SVC | ||
| 265 | JOB_SVC --> DB | ||
| 266 | JOB_SVC --> RUNNER_CLIENT | ||
| 267 | RUNNER_CLIENT -->|"HTTP POST"| RUN_JOB | ||
| 268 | |||
| 269 | %% Analysis execution | ||
| 270 | RUN_JOB --> ANALYZER | ||
| 271 | ANALYZER --> RETRIEVAL | ||
| 272 | ANALYZER --> WEBSEARCH | ||
| 273 | ANALYZER --> MBFC | ||
| 274 | |||
| 275 | %% External calls | ||
| 276 | ANALYZER -->|"AI SDK"| LLM_PROVIDERS | ||
| 277 | WEBSEARCH --> SEARCH_PROVIDERS | ||
| 278 | RETRIEVAL --> WEB | ||
| 279 | |||
| 280 | %% Styling | ||
| 281 | classDef external fill:#fff3e0,stroke:#e65100,stroke-width:2px | ||
| 282 | classDef core fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px | ||
| 283 | classDef api fill:#e3f2fd,stroke:#1565c0,stroke-width:2px | ||
| 284 | |||
| 285 | class LLM_PROVIDERS,SEARCH_PROVIDERS,WEB external | ||
| 286 | class ANALYZER,RETRIEVAL,WEBSEARCH,MBFC core | ||
| 287 | class ANALYZE_API,JOBS_API,JOB_API,EVENTS_API,RUN_JOB api | ||
| 288 | {{/mermaid}} | ||
| 289 | |||
| 290 | ----- | ||
| 291 | |||
| 292 | |||
| 293 | == 4. Specification vs Implementation Gap Analysis == | ||
| 294 | |||
| 295 | |||
| 296 | |||
| 297 | === 4.1 Data Model Gaps === | ||
| 298 | |||
| 299 | |||
| 300 | | Specification Entity | POC1 Status | Gap Description | | ||
| 301 | |-|-|-| | ||
| 302 | | **Claim** | ⚠️ Partial | No persistent storage; claims exist only in JSON result. Missing: `status`, `confidence_score`, `risk_score`, `completeness_score`, `version`, `views`, `edit_count` | | ||
| 303 | | **Evidence** | ⚠️ Partial | Implemented as `ExtractedFact` but lacks: `supports` enum, proper `relevance_score` | | ||
| 304 | | **Source** | ⚠️ Partial | `FetchedSource` exists but missing: `type` enum, `accuracy_history`, `correction_frequency`, weekly update scheduler | | ||
| 305 | | **Scenario** | ❌ Missing | Not implemented. Claims are evaluated directly without scenario contexts | | ||
| 306 | | **Verdict** | ⚠️ Partial | `ClaimVerdict` exists but missing: `likelihood_range`, `uncertainty_factors` array, proper `explanation_summary` | | ||
| 307 | | **User** | ❌ Missing | No user authentication or role system | | ||
| 308 | | **Edit** | ❌ Missing | No audit trail for changes | | ||
| 309 | |||
| 310 | === 4.2 AKEL Component Gaps === | ||
| 311 | |||
| 312 | | Spec Component | POC1 Status | Gap Description | | ||
| 313 | | |-|-| | ||
| 314 | | **AKEL Orchestrator** | ✅ Implemented | `runAnalysis()` function serves this role | | ||
| 315 | | **Claim Extractor** | ✅ Implemented | `understandClaim()` with claim role/dependency tracking | | ||
| 316 | | **Claim Classifier** | ⚠️ Partial | Risk tier (A/B/C) assigned, but no domain classification | | ||
| 317 | | **Scenario Generator** | ❌ Missing | Claims evaluated without scenario extraction | | ||
| 318 | | **Evidence Summarizer** | ✅ Implemented | `extractFacts()` function | | ||
| 319 | | **Contradiction Detector** | ⚠️ Partial | `isContestedClaim` flag exists but no active contradiction search | | ||
| 320 | | **Quality Gate Validator** | ❌ Missing | No source quality gates, no mandatory checks | | ||
| 321 | | **Audit Sampling Scheduler** | ❌ Missing | No audit system | | ||
| 322 | | **Embedding Handler** | ❌ Missing | Not needed for POC | | ||
| 323 | | **Federation Sync** | ❌ Missing | Not needed for POC | | ||
| 324 | |||
| 325 | === 4.3 Architecture Gaps === | ||
| 326 | |||
| 327 | |||
| 328 | | Spec Requirement | POC1 Status | Gap Description | | ||
| 329 | ||-|-| | ||
| 330 | | **Three-Layer Architecture** | ✅ Implemented | Interface (Next.js) → Processing (AKEL) → Data (SQLite) | | ||
| 331 | | **LLM Abstraction Layer** | ✅ Implemented | AI SDK supports multiple providers with failover | | ||
| 332 | | **PostgreSQL Primary DB** | ⚠️ Different | Using SQLite for simplicity (acceptable for POC) | | ||
| 333 | | **Redis Caching** | ❌ Missing | No caching layer | | ||
| 334 | | **S3 Archival** | ❌ Missing | No long-term storage | | ||
| 335 | | **Background Jobs** | ❌ Missing | No scheduler for source updates, cache warming | | ||
| 336 | | **Quality Monitoring** | ⚠️ Partial | LLM call counting exists, but no anomaly detection | | ||
| 337 | |||
| 338 | === 4.4 Publication & Review Gaps === | ||
| 339 | |||
| 340 | |||
| 341 | | Spec Feature | POC1 Status | Gap Description | | ||
| 342 | ||-|-| | ||
| 343 | | **Risk Tier Publication Rules** | ❌ Missing | All results published immediately regardless of tier | | ||
| 344 | | **Human Review Queue** | ❌ Missing | No review workflow | | ||
| 345 | | **AI-Generated Labeling** | ⚠️ Partial | Results show "AI analysis" but no formal labeling system | | ||
| 346 | | **Audit Rate Sampling** | ❌ Missing | No sampling audits | | ||
| 347 | |||
| 348 | ----- | ||
| 349 | |||
| 350 | |||
| 351 | == 5. Optimization Recommendations == | ||
| 352 | |||
| 353 | |||
| 354 | |||
| 355 | === 5.1 Cost Optimizations === | ||
| 356 | |||
| 357 | |||
| 358 | {{mermaid}} | ||
| 359 | pie title Current LLM Cost Distribution (Estimated per Analysis) | ||
| 360 | "Step 1: Understand" : 15 | ||
| 361 | "Step 2: Research (per source)" : 60 | ||
| 362 | "Step 3: Verdicts" : 25 | ||
| 363 | {{/mermaid}} | ||
| 364 | |||
| 365 | | Optimization | Estimated Savings | Implementation Effort | | ||
| 366 | ||-----|| | ||
| 367 | | **Cache claim understanding** | 30-50% on repeated claims | Medium | | ||
| 368 | | **Use Haiku for fact extraction** | 40% on Step 2 costs | Low (config change) | | ||
| 369 | | **Batch fact extraction** | 20% fewer API calls | Medium | | ||
| 370 | | **Skip search for known claims** | 50%+ for cached claims | High (needs claim DB) | | ||
| 371 | | **Reduce max iterations** | Linear reduction | Low (config change) | | ||
| 372 | |||
| 373 | === 5.2 Timing Optimizations === | ||
| 374 | |||
| 375 | |||
| 376 | {{mermaid}} | ||
| 377 | gantt | ||
| 378 | title Current Analysis Timeline (Typical) | ||
| 379 | dateFormat ss | ||
| 380 | axisFormat %S sec | ||
| 381 | |||
| 382 | section Current Flow | ||
| 383 | URL Fetch :a1, 00, 2s | ||
| 384 | Step 1 Understand :a2, after a1, 15s | ||
| 385 | Search Iteration 1 :a3, after a2, 8s | ||
| 386 | Fetch Sources 1 :a4, after a3, 10s | ||
| 387 | Extract Facts 1 :a5, after a4, 12s | ||
| 388 | Search Iteration 2 :a6, after a5, 8s | ||
| 389 | Fetch Sources 2 :a7, after a6, 10s | ||
| 390 | Extract Facts 2 :a8, after a7, 12s | ||
| 391 | Generate Verdicts :a9, after a8, 15s | ||
| 392 | |||
| 393 | section Optimized Flow | ||
| 394 | URL Fetch :b1, 00, 2s | ||
| 395 | Step 1 Understand :b2, after b1, 10s | ||
| 396 | Search + Fetch (parallel) :b3, after b2, 12s | ||
| 397 | Extract Facts (batched) :b4, after b3, 8s | ||
| 398 | Generate Verdicts :b5, after b4, 10s | ||
| 399 | {{/mermaid}} | ||
| 400 | |||
| 401 | | Optimization | Time Savings | Notes | | ||
| 402 | |||-----| | ||
| 403 | | **Parallel source fetching** | Already implemented | Currently fetches 3 sources in parallel | | ||
| 404 | | **Streaming LLM responses** | 20-30% perceived | User sees progress faster | | ||
| 405 | | **Search query batching** | 10-15% | Send multiple queries to search API | | ||
| 406 | | **Reduce prompt size** | 5-10% per call | Optimize system prompts | | ||
| 407 | | **Use faster models for extraction** | 30-40% on Step 2 | Claude Haiku vs Sonnet | | ||
| 408 | |||
| 409 | === 5.3 Priority Recommendations === | ||
| 410 | |||
| 411 | |||
| 412 | 1. **HIGH PRIORITY - Implement Claim Caching** | ||
| 413 | - Cache claim verdicts by content hash | ||
| 414 | - Reduces costs for repeated/similar claims | ||
| 415 | - Enables the separated verdict architecture (see Section 6) | ||
| 416 | |||
| 417 | 2. **MEDIUM PRIORITY - Use Tiered Models** | ||
| 418 | - Step 1 (Understand): Sonnet (needs reasoning) | ||
| 419 | - Step 2 (Extract): Haiku (simple extraction) | ||
| 420 | - Step 3 (Verdicts): Sonnet (needs synthesis) | ||
| 421 | |||
| 422 | 3. **LOW PRIORITY - Add Redis Cache** | ||
| 423 | - Cache source content (24h TTL) | ||
| 424 | - Cache search results (1h TTL) | ||
| 425 | - Reduces external API calls | ||
| 426 | |||
| 427 | ----- | ||
| 428 | |||
| 429 | |||
| 430 | == 6. Separated Verdict Architecture Proposal == | ||
| 431 | |||
| 432 | |||
| 433 | |||
| 434 | === 6.1 Current Architecture === | ||
| 435 | |||
| 436 | |||
| 437 | {{mermaid}} | ||
| 438 | flowchart LR | ||
| 439 | subgraph Current["Current: Monolithic Analysis"] | ||
| 440 | INPUT[Article Input] --> ANALYZE[Full Analysis Pipeline] | ||
| 441 | ANALYZE --> CLAIMS[Claim Verdicts] | ||
| 442 | ANALYZE --> ARTICLE[Article Verdict] | ||
| 443 | CLAIMS -.->|"Aggregated"| ARTICLE | ||
| 444 | end | ||
| 445 | {{/mermaid}} | ||
| 446 | |||
| 447 | **Issues:** | ||
| 448 | - Every analysis re-processes all claims | ||
| 449 | - No caching of individual claim verdicts | ||
| 450 | - Article verdict tightly coupled to claim extraction | ||
| 451 | |||
| 452 | |||
| 453 | === 6.2 Proposed Separated Architecture === | ||
| 454 | |||
| 455 | |||
| 456 | {{mermaid}} | ||
| 457 | flowchart TB | ||
| 458 | subgraph Input["Input Processing"] | ||
| 459 | ARTICLE[Article/Text Input] | ||
| 460 | EXTRACT[Claim Extraction] | ||
| 461 | end | ||
| 462 | |||
| 463 | subgraph ClaimLayer["Claim Verdict Layer (Cacheable)"] | ||
| 464 | CACHE[(Claim Cache<br/>━━━━━━━━━━━━━<br/>Key: claim_hash<br/>TTL: 7 days)] | ||
| 465 | |||
| 466 | CLAIM1["Claim 1 Analysis"] | ||
| 467 | CLAIM2["Claim 2 Analysis"] | ||
| 468 | CLAIM3["Claim N Analysis"] | ||
| 469 | |||
| 470 | VERDICT1[Claim 1 Verdict] | ||
| 471 | VERDICT2[Claim 2 Verdict] | ||
| 472 | VERDICT3[Claim N Verdict] | ||
| 473 | end | ||
| 474 | |||
| 475 | subgraph ArticleLayer["Article Verdict Layer (Dynamic)"] | ||
| 476 | AGGREGATE[Aggregate Claim Verdicts] | ||
| 477 | CONTEXT[Apply Article Context<br/>━━━━━━━━━━━━━<br/>• Claim relationships<br/>• Logical structure<br/>• Author intent] | ||
| 478 | ARTICLE_VERDICT[Article Verdict] | ||
| 479 | end | ||
| 480 | |||
| 481 | %% Flow | ||
| 482 | ARTICLE --> EXTRACT | ||
| 483 | EXTRACT --> CLAIM1 | ||
| 484 | EXTRACT --> CLAIM2 | ||
| 485 | EXTRACT --> CLAIM3 | ||
| 486 | |||
| 487 | CLAIM1 -->|"Cache Miss"| VERDICT1 | ||
| 488 | CLAIM2 -->|"Cache Hit"| VERDICT2 | ||
| 489 | CLAIM3 -->|"Cache Miss"| VERDICT3 | ||
| 490 | |||
| 491 | CLAIM1 <-.-> CACHE | ||
| 492 | CLAIM2 <-.-> CACHE | ||
| 493 | CLAIM3 <-.-> CACHE | ||
| 494 | |||
| 495 | VERDICT1 --> AGGREGATE | ||
| 496 | VERDICT2 --> AGGREGATE | ||
| 497 | VERDICT3 --> AGGREGATE | ||
| 498 | |||
| 499 | AGGREGATE --> CONTEXT | ||
| 500 | CONTEXT --> ARTICLE_VERDICT | ||
| 501 | |||
| 502 | classDef cache fill:#fff9c4,stroke:#f57f17,stroke-width:2px | ||
| 503 | classDef dynamic fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px | ||
| 504 | class CACHE cache | ||
| 505 | class CONTEXT,ARTICLE_VERDICT dynamic | ||
| 506 | {{/mermaid}} | ||
| 507 | |||
| 508 | |||
| 509 | === 6.3 Benefits Analysis === | ||
| 510 | |||
| 511 | |||
| 512 | | Benefit | Impact | Rationale | | ||
| 513 | |-| |-----| | ||
| 514 | | **Cost Reduction** | 40-70% for repeated claims | Many articles share common claims (e.g., "COVID vaccines are safe") | | ||
| 515 | | **Faster Analysis** | 50%+ for cached claims | Skip research + LLM calls for known claims | | ||
| 516 | | **Consistency** | High | Same claim always gets same verdict (until cache expires) | | ||
| 517 | | **Freshness Control** | Configurable TTL | Balance consistency vs. new evidence | | ||
| 518 | | **Scalability** | Linear improvement | More users = higher cache hit rate | | ||
| 519 | |||
| 520 | === 6.4 Implementation Considerations === | ||
| 521 | |||
| 522 | **Claim Hashing Strategy:** | ||
| 523 | {{code language="typescript"}}function getClaimHash(claim: string): string { | ||
| 524 | // Normalize: lowercase, remove punctuation, stem words | ||
| 525 | const normalized = normalize(claim); | ||
| 526 | // Hash for cache key | ||
| 527 | return crypto.createHash('sha256').update(normalized).digest('hex').slice(0, 16); | ||
| 528 | }{{/code}} | ||
| 529 | |||
| 530 | **Cache Invalidation Triggers:** | ||
| 531 | - TTL expiration (default 7 days) | ||
| 532 | - Major news event related to claim topic | ||
| 533 | - Source track record significant change | ||
| 534 | - Manual invalidation by moderator | ||
| 535 | |||
| 536 | **Article Verdict Considerations:** | ||
| 537 | - Article verdict should ALWAYS be dynamic (never cached) | ||
| 538 | - Same claims in different article contexts may yield different article verdicts | ||
| 539 | - Example: "Vaccines are safe" + "Vaccines cause autism" → article may be misleading even if first claim is true | ||
| 540 | |||
| 541 | ### 6.5 Recommendation## | ||
| 542 | |||
| 543 | **YES, separating is beneficial** with the following caveats: | ||
| 544 | |||
| 545 | 1. **Claim verdicts should be cached** with semantic similarity matching (not just exact match) | ||
| 546 | 2. **Article verdicts should always be dynamic** to account for: | ||
| 547 | - Claim relationships and logical structure | ||
| 548 | - Author's argumentative strategy | ||
| 549 | - Context and framing | ||
| 550 | - Selective use of true claims to support false conclusions | ||
| 551 | |||
| 552 | 3. **Implementation phases:** | ||
| 553 | - Phase 1: Exact-match claim caching (simple hash) | ||
| 554 | - Phase 2: Semantic similarity caching (embedding-based) | ||
| 555 | - Phase 3: Federated claim sharing across instances | ||
| 556 | |||
| 557 | ----- | ||
| 558 | |||
| 559 | |||
| 560 | == 7. Summary == | ||
| 561 | |||
| 562 | |||
| 563 | |||
| 564 | === Current State === | ||
| 565 | |||
| 566 | - POC1 implements core AKEL pipeline successfully | ||
| 567 | - Claim dependency tracking is implemented | ||
| 568 | - Multiple LLM providers supported | ||
| 569 | - No persistent claim storage or caching | ||
| 570 | |||
| 571 | |||
| 572 | === Key Gaps from Specification === | ||
| 573 | |||
| 574 | - No scenario extraction | ||
| 575 | - No user/role system | ||
| 576 | - No audit trail | ||
| 577 | - No source track record updates | ||
| 578 | - No review queue | ||
| 579 | |||
| 580 | |||
| 581 | === Recommended Next Steps === | ||
| 582 | |||
| 583 | 1. Implement claim caching layer | ||
| 584 | 2. Separate claim vs article verdict generation | ||
| 585 | 3. Add Redis for source/search caching | ||
| 586 | 4. Implement tiered model selection | ||
| 587 | 5. Add basic audit logging |