#!/usr/bin/env python3
# update-repo.py v3.1 — NUX repo index generator
# Düzeltmeler:
#   - Geçersiz paket adı kontrolü
#   - Geçersiz dependency/provides temizleme
#   - Bozuk metadata skip

import json
import tarfile
import hashlib
import subprocess
import re
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, List, Any

REPO_ROOT = Path("/var/www/html/nux")
PACKAGES_DIR = REPO_ROOT / "packages"
INDEX_FILE = REPO_ROOT / "index.json"
CACHE_FILE = REPO_ROOT / ".pkg_cache.json"

# Geçerli Debian paket adı regex
PKG_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9+.-]*$")

LIST_FIELDS = [
    "depends",
    "pre_depends",
    "recommends",
    "suggests",
    "conflicts",
    "provides",
]


def is_valid_pkg_name(name: str) -> bool:
    """Geçerli Debian paket adı mı?"""
    return bool(name and PKG_NAME_RE.fullmatch(name))


def clean_list_field(items: List[Any]) -> List[str]:
    """Liste alanından geçersiz paket adlarını temizle"""
    if not isinstance(items, list):
        return []
    
    cleaned = []
    for item in items:
        if isinstance(item, str) and is_valid_pkg_name(item):
            cleaned.append(item)
    return cleaned


def sha256sum(path: Path) -> str:
    """Dosyanın SHA256 hash'ini hesapla"""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b""):
            h.update(chunk)
    return h.hexdigest()


def version_cmp(v1: str, v2: str) -> int:
    """İki versiyonu karşılaştır (dpkg mantığıyla)"""
    try:
        if subprocess.run(
            ["dpkg", "--compare-versions", v1, "lt", v2],
            capture_output=True
        ).returncode == 0:
            return -1
        if subprocess.run(
            ["dpkg", "--compare-versions", v1, "gt", v2],
            capture_output=True
        ).returncode == 0:
            return 1
        return 0
    except Exception:
        if v1 < v2:
            return -1
        if v1 > v2:
            return 1
        return 0


def load_cache() -> dict:
    """Önceki cache'i yükle"""
    if CACHE_FILE.exists():
        try:
            with open(CACHE_FILE, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception:
            pass
    return {}


def save_cache(cache: dict):
    """Cache'i diske yaz"""
    tmp = CACHE_FILE.with_suffix(".tmp")
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(cache, f, indent=2, ensure_ascii=False)
    tmp.replace(CACHE_FILE)


def file_fingerprint(path: Path) -> str:
    """Dosyanın boyut + mtime kombinasyonu"""
    stat = path.stat()
    return f"{stat.st_size}:{stat.st_mtime}"


def validate_metadata(meta: Dict, pkg_path: Path) -> Optional[Dict]:
    """Metadata'yı doğrula ve temizle"""
    name = meta.get("name", "")
    version = meta.get("version", "")
    
    # Paket adı kontrolü
    if not is_valid_pkg_name(name):
        print(f"  [!] Geçersiz paket adı: '{name}' ({pkg_path.name})")
        return None
    
    # Versiyon kontrolü
    if not version or not isinstance(version, str):
        print(f"  [!] Geçersiz versiyon: '{version}' ({pkg_path.name})")
        return None
    
    # Liste alanlarını temizle
    for field in LIST_FIELDS:
        original = meta.get(field, [])
        cleaned = clean_list_field(original)
        
        if len(cleaned) != len(original) if isinstance(original, list) else True:
            removed = set(original) - set(cleaned) if isinstance(original, list) else set()
            if removed:
                print(f"  [clean] {name}/{field}: {', '.join(list(removed)[:5])}")
        
        meta[field] = cleaned
    
    return meta


def read_metadata(pkg_path: Path) -> Optional[Dict]:
    """Tar dosyasını aç ve metadata.json oku"""
    try:
        with tarfile.open(pkg_path, "r:*") as tar:
            try:
                meta_member = tar.getmember("metadata.json")
            except KeyError:
                print(f"  [!] metadata.json yok: {pkg_path.name}")
                return None

            meta_file = tar.extractfile(meta_member)
            if not meta_file:
                print(f"  [!] metadata.json okunamadı: {pkg_path.name}")
                return None

            meta = json.loads(meta_file.read().decode("utf-8"))
            
            # Liste alanlarını kontrol et
            for key in LIST_FIELDS:
                if not isinstance(meta.get(key), list):
                    meta[key] = []

            # Doğrula ve temizle
            return validate_metadata(meta, pkg_path)
            
    except json.JSONDecodeError as e:
        print(f"  [!] JSON parse hatası ({pkg_path.name}): {e}")
        return None
    except Exception as e:
        print(f"  [!] Hata ({pkg_path.name}): {e}")
        return None


def update_index():
    if not PACKAGES_DIR.exists():
        print("[!] Packages klasörü bulunamadı!")
        return 1

    old_cache = load_cache()
    new_cache = {}

    packages = {}
    total_versions = 0
    skipped = 0
    processed = 0
    invalid = 0

    print(f"[*] Paketler taranıyor: {PACKAGES_DIR}")
    print()

    for pkg_path in sorted(PACKAGES_DIR.glob("*.nux")):
        fname = pkg_path.name
        fp = file_fingerprint(pkg_path)

        # Cache kontrolü
        if fname in old_cache and old_cache[fname].get("fingerprint") == fp:
            cached = old_cache[fname]
            meta = cached.get("metadata")
            
            # Cache'deki metadata'yı da doğrula
            if meta:
                meta = validate_metadata(meta, pkg_path)
            
            if not meta:
                # Cache bozuk, yeniden oku
                meta = read_metadata(pkg_path)
                if not meta:
                    invalid += 1
                    continue
                file_sha256 = sha256sum(pkg_path)
                file_size = pkg_path.stat().st_size
                processed += 1
                print(f"  [+] {fname} (cache invalid, yeniden okundu)")
            else:
                file_sha256 = cached["sha256"]
                file_size = cached["size"]
                skipped += 1
                print(f"  [=] {fname} (cache)")
        else:
            meta = read_metadata(pkg_path)
            if not meta:
                invalid += 1
                continue
            file_sha256 = sha256sum(pkg_path)
            file_size = pkg_path.stat().st_size
            processed += 1
            print(f"  [+] {fname} (okundu)")

        name = meta.get("name")
        version = meta.get("version")

        # Son kontrol
        if not name or not version:
            print(f"  [!] Geçersiz metadata: {fname}")
            invalid += 1
            continue

        meta["filename"] = f"packages/{fname}"
        meta["sha256"] = file_sha256
        meta["size"] = file_size

        new_cache[fname] = {
            "fingerprint": fp,
            "metadata": meta,
            "sha256": file_sha256,
            "size": file_size,
        }

        if name not in packages:
            packages[name] = {
                "latest": version,
                "versions": {}
            }

        packages[name]["versions"][version] = meta

        if version_cmp(version, packages[name]["latest"]) > 0:
            packages[name]["latest"] = version

        total_versions += 1

    save_cache(new_cache)

    repo_data = {
        "status": "ok",
        "generated_at": datetime.utcnow().isoformat() + "Z",
        "package_count": len(packages),
        "version_count": total_versions,
        "repo_version": "3.1",
        "packages": packages,
    }

    tmp_file = INDEX_FILE.with_suffix(".json.tmp")
    with open(tmp_file, "w", encoding="utf-8") as f:
        json.dump(repo_data, f, indent=2, ensure_ascii=False)
    tmp_file.replace(INDEX_FILE)

    print()
    print(f"[OK] index.json güncellendi")
    print(f"[OK] Toplam: {len(packages)} paket, {total_versions} versiyon")
    print(f"[OK] İşlenen: {processed} | Cache: {skipped} | Geçersiz: {invalid}")

    return 0


if __name__ == "__main__":
    raise SystemExit(update_index())
