Changes for page FactHarbor POC1 Architecture Analysis 1.Jan.26
Last modified by Robert Schaub on 2026/02/08 08:12
From version 8.1
edited by Robert Schaub
on 2026/01/02 10:13
on 2026/01/02 10:13
Change comment:
There is no comment for this version
To version 4.1
edited by Robert Schaub
on 2026/01/02 10:03
on 2026/01/02 10:03
Change comment:
There is no comment for this version
Summary
-
Page properties (2 modified, 0 added, 0 removed)
Details
- Page properties
-
- Title
-
... ... @@ -1,1 +1,1 @@ 1 - FactHarborPOC1 Architecture Analysis1 +POC1 Architecture Analysis - 1.Jan.26 - Content
-
... ... @@ -1,5 +1,6 @@ 1 -= FactHarbor POC1 Architecture Analysis 1.Jan.26=1 += FactHarbor POC1 Architecture Analysis = 2 2 3 + 3 3 **Version:** 2.6.17 4 4 **Analysis Date:** January 2026 5 5 **Document Purpose:** Technical diagrams, gap analysis, and optimization recommendations ... ... @@ -92,123 +92,12 @@ 92 92 93 93 ---- 94 94 96 + 95 95 == 2. ERD Data Model (Current POC1 Implementation) == 96 96 97 -**Data Objects ERD** 98 98 99 99 {{mermaid}} 100 100 erDiagram 101 - ARTICLE ||--o{ CLAIM : "contains" 102 - ARTICLE ||--|| ARTICLE_VERDICT : "has" 103 - CLAIM ||--|| CLAIM_VERDICT : "has" 104 - CLAIM ||--o{ CLAIM : "depends on" 105 - CLAIM_VERDICT }o--o{ EVIDENCE : "supported by" 106 - SOURCE ||--o{ EVIDENCE : "provides" 107 - ARTICLE ||--o{ SOURCE : "references" 108 - 109 - ARTICLE { 110 - string id PK "Unique identifier (job ID)" 111 - string inputType "text | url" 112 - string inputValue "Original URL or text" 113 - string articleThesis "Main argument/thesis" 114 - string detectedInputType "question | claim | article" 115 - boolean isQuestion "True if input is a question" 116 - datetime createdAt "Analysis timestamp" 117 - datetime updatedAt "Last update" 118 - json distinctProceedings "Legal proceedings if any" 119 - boolean hasMultipleProceedings "Multi-proceeding flag" 120 - string proceedingContext "Context for proceedings" 121 - json logicalFallacies "Detected fallacies array" 122 - boolean isPseudoscience "Pseudoscience detection" 123 - string_array pseudoscienceCategories "Categories if detected" 124 - int llmCalls "Total LLM API calls" 125 - json searchQueries "All search queries performed" 126 - string schemaVersion "e.g. 2.6.17" 127 - } 128 - 129 - CLAIM { 130 - string id PK "SC1, SC2, C1, etc." 131 - string articleId FK "Parent article" 132 - string text "The claim statement" 133 - string type "legal | procedural | factual | evaluative" 134 - string claimRole "attribution | source | timing | core" 135 - string_array dependsOn "IDs of prerequisite claims" 136 - string_array keyEntities "Named entities in claim" 137 - boolean isCentral "Is this a central claim?" 138 - string relatedProceedingId "Linked proceeding if any" 139 - int startOffset "Position in original text" 140 - int endOffset "End position in original text" 141 - string approximatePosition "Descriptive position" 142 - } 143 - 144 - CLAIM_VERDICT { 145 - string id PK "Same as claim ID" 146 - string claimId FK "Reference to claim" 147 - string llmVerdict "WELL-SUPPORTED | PARTIALLY-SUPPORTED | UNCERTAIN | REFUTED" 148 - string verdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False" 149 - int confidence "0-100 LLM confidence" 150 - int truthPercentage "0-100 calibrated truth score" 151 - string riskTier "A (high) | B (medium) | C (low)" 152 - string reasoning "Explanation of verdict" 153 - string_array supportingFactIds "Evidence IDs supporting this" 154 - boolean dependencyFailed "True if prerequisite failed" 155 - string_array failedDependencies "Which deps failed" 156 - string highlightColor "green | light-green | yellow | orange | dark-orange | red | dark-red" 157 - boolean isPseudoscience "Pseudoscience flag" 158 - string escalationReason "Why verdict was escalated" 159 - } 160 - 161 - ARTICLE_VERDICT { 162 - string id PK "Same as article ID" 163 - string articleId FK "Reference to article" 164 - string llmArticleVerdict "Original LLM verdict" 165 - int llmArticleConfidence "Original LLM confidence" 166 - string articleVerdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False" 167 - int articleTruthPercentage "0-100 calibrated score" 168 - string articleVerdictReason "Why verdict differs from claims avg" 169 - int claimsAverageTruthPercentage "Average of claim verdicts" 170 - string claimsAverageVerdict "7-point average verdict" 171 - int claimsTotal "Total claims analyzed" 172 - int claimsSupported "Claims with truth >= 72%" 173 - int claimsUncertain "Claims with truth 43-71%" 174 - int claimsRefuted "Claims with truth < 43%" 175 - int centralClaimsTotal "Number of central claims" 176 - int centralClaimsSupported "Central claims supported" 177 - } 178 - 179 - EVIDENCE { 180 - string id PK "S1-F1, S1-F2 format" 181 - string sourceId FK "Reference to source" 182 - string claimId FK "Optional: specific claim this supports" 183 - string fact "The factual statement extracted" 184 - string category "legal_provision | evidence | expert_quote | statistic | event | criticism" 185 - string specificity "high | medium" 186 - string sourceExcerpt "Original text excerpt" 187 - string relatedProceedingId "Linked proceeding if any" 188 - boolean isContestedClaim "Is this a contested assertion" 189 - string claimSource "Who made contested claim" 190 - } 191 - 192 - SOURCE { 193 - string id PK "S1, S2, etc." 194 - string articleId FK "Parent article" 195 - string url "Full URL" 196 - string title "Page/document title" 197 - string domain "Extracted domain" 198 - int trackRecordScore "0-100 reliability score or null" 199 - string fullText "Extracted content" 200 - datetime fetchedAt "When content was fetched" 201 - string category "news | academic | government | legal" 202 - boolean fetchSuccess "True if fetch succeeded" 203 - string searchQuery "Which query found this" 204 - string mimeType "text/html | application/pdf" 205 - } 206 -{{/mermaid}} 207 - 208 -**Data Usage ERD** 209 - 210 -{{mermaid}} 211 -erDiagram 212 212 JOB ||--o{ JOB_EVENT : "has" 213 213 JOB ||--|| ANALYSIS_RESULT : "produces" 214 214 ANALYSIS_RESULT ||--o{ CLAIM_VERDICT : "contains" ... ... @@ -298,8 +298,10 @@ 298 298 299 299 ---- 300 300 191 + 301 301 == 3. Overall Architecture with Interactions == 302 302 194 + 303 303 {{mermaid}} 304 304 flowchart TB 305 305 subgraph Client["🖥️ Client Layer"] ... ... @@ -395,10 +395,14 @@ 395 395 396 396 ---- 397 397 290 + 398 398 == 4. Specification vs Implementation Gap Analysis == 399 399 293 + 294 + 400 400 === 4.1 Data Model Gaps === 401 401 297 + 402 402 | Specification Entity | POC1 Status | Gap Description | 403 403 |-|-|-| 404 404 | **Claim** | ⚠️ Partial | No persistent storage; claims exist only in JSON result. Missing: `status`, `confidence_score`, `risk_score`, `completeness_score`, `version`, `views`, `edit_count` | ... ... @@ -426,6 +426,7 @@ 426 426 427 427 === 4.3 Architecture Gaps === 428 428 325 + 429 429 | Spec Requirement | POC1 Status | Gap Description | 430 430 | |-|-| 431 431 | **Three-Layer Architecture** | ✅ Implemented | Interface (Next.js) → Processing (AKEL) → Data (SQLite) | ... ... @@ -438,6 +438,7 @@ 438 438 439 439 === 4.4 Publication & Review Gaps === 440 440 338 + 441 441 | Spec Feature | POC1 Status | Gap Description | 442 442 | |-|-| 443 443 | **Risk Tier Publication Rules** | ❌ Missing | All results published immediately regardless of tier | ... ... @@ -447,10 +447,14 @@ 447 447 448 448 ---- 449 449 348 + 450 450 == 5. Optimization Recommendations == 451 451 351 + 352 + 452 452 === 5.1 Cost Optimizations === 453 453 355 + 454 454 {{mermaid}} 455 455 pie title Current LLM Cost Distribution (Estimated per Analysis) 456 456 "Step 1: Understand" : 15 ... ... @@ -468,6 +468,7 @@ 468 468 469 469 === 5.2 Timing Optimizations === 470 470 373 + 471 471 {{mermaid}} 472 472 gantt 473 473 title Current Analysis Timeline (Typical) ... ... @@ -503,6 +503,7 @@ 503 503 504 504 === 5.3 Priority Recommendations === 505 505 409 + 506 506 1. **HIGH PRIORITY - Implement Claim Caching** 507 507 - Cache claim verdicts by content hash 508 508 - Reduces costs for repeated/similar claims ... ... @@ -520,10 +520,14 @@ 520 520 521 521 ---- 522 522 427 + 523 523 == 6. Separated Verdict Architecture Proposal == 524 524 430 + 431 + 525 525 === 6.1 Current Architecture === 526 526 434 + 527 527 {{mermaid}} 528 528 flowchart LR 529 529 subgraph Current["Current: Monolithic Analysis"] ... ... @@ -539,8 +539,10 @@ 539 539 - No caching of individual claim verdicts 540 540 - Article verdict tightly coupled to claim extraction 541 541 450 + 542 542 === 6.2 Proposed Separated Architecture === 543 543 453 + 544 544 {{mermaid}} 545 545 flowchart TB 546 546 subgraph Input["Input Processing"] ... ... @@ -593,8 +593,10 @@ 593 593 class CONTEXT,ARTICLE_VERDICT dynamic 594 594 {{/mermaid}} 595 595 506 + 596 596 === 6.3 Benefits Analysis === 597 597 509 + 598 598 | Benefit | Impact | Rationale | 599 599 |-| |-| 600 600 | **Cost Reduction** | 40-70% for repeated claims | Many articles share common claims (e.g., "COVID vaccines are safe") | ... ... @@ -642,8 +642,11 @@ 642 642 643 643 ---- 644 644 557 + 645 645 == 7. Summary == 646 646 560 + 561 + 647 647 === Current State === 648 648 649 649 - POC1 implements core AKEL pipeline successfully ... ... @@ -651,6 +651,7 @@ 651 651 - Multiple LLM providers supported 652 652 - No persistent claim storage or caching 653 653 569 + 654 654 === Key Gaps from Specification === 655 655 656 656 - No scenario extraction ... ... @@ -659,6 +659,7 @@ 659 659 - No source track record updates 660 660 - No review queue 661 661 578 + 662 662 === Recommended Next Steps === 663 663 664 664 1. Implement claim caching layer