import requests
import time
import os
import random
import sqlite3
from datetime import datetime
from collections import deque
from threading import Lock
from PIL import Image
from io import BytesIO
import torch
import traceback
import json
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from sentence_transformers import SentenceTransformer
from bs4 import BeautifulSoup

# BLIP2 Modell und Processor laden
print("Lade BLIP2-Processor und Modell...")
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl", torch_dtype=torch.float32, device_map="cpu")
print("BLIP2 Modell geladen.")

# Sentence Transformer für Vektorisierung laden
print("Lade Sentence Transformer Modell...")
sentence_model = SentenceTransformer('BAAI/bge-base-en-v1.5')
print("Sentence Transformer Modell geladen.")

# Rate Limiting: Maximal 3 Anfragen pro Sekunde
request_times = deque(maxlen=3)
request_lock = Lock()

def check_rate_limit():
    with request_lock:
        current_time = time.time()
        if len(request_times) == 3:
            # Warte bis die älteste Anfrage 1 Sekunde her ist
            wait_time = 1 - (current_time - request_times[0])
            if wait_time > 0:
                time.sleep(wait_time)
        request_times.append(current_time)

def is_valid_license(license_info):
    # NASA-Bilder sind oft Public Domain, daher hier immer True zurückgeben
    return True

def call_mistral_api(meta):
    """Sendet die Bild-Metadaten an die Ollama/Mistral-API und gibt die englische Beschreibung zurück."""
    mistral_url = "https://ollamalocal.j-jn.com/api.php"
    mistral_key = "a1b2c345a1b2c345"
    prompt = (
        "You are an expert image analyzer. Write a detailed description of what is shown in the image, focusing only on the visual content. "
        "Include important details about people, objects, actions, setting, and atmosphere. "
        "Ignore technical details like photographer, upload date, or license information.\n"
        f"Available context:\n"
        f"Title: {meta.get('title','')}\n"
        f"Description: {meta.get('description','')}\n"
        f"Creator: {meta.get('creator','')}\n"
        f"Tags: {', '.join(meta.get('tags', []))}\n"
        f"Date: {meta.get('date','')}\n"
        "Write a detailed image description:"
    )
    try:
        response = requests.post(
            mistral_url,
            headers={"Content-Type": "application/json"},
            data=json.dumps({
                "key": mistral_key,
                "model": "mistral",
                "prompt": prompt,
                "stream": False
            })
        )
        if response.status_code == 200:
            data = response.json()
            return data.get("response", "").strip()
        else:
            print(f"Fehler von Mistral-API: {response.status_code} {response.text}")
            return ""
    except Exception as e:
        print(f"Fehler beim Aufruf der Mistral-API: {e}")
        return ""

def analyze_with_blip2(image_url, context_info, mistral_context=None):
    try:
        print(f"Lade Bild von {image_url}")
        headers = {"User-Agent": "Mozilla/5.0 (compatible; ImageDownloader/1.0; +https://openverse.org)"}
        response = requests.get(image_url, headers=headers)
        if response.status_code != 200:
            print(f"Fehler beim Bild-Download: Status {response.status_code}")
            return None
            
        # Überprüfe Content-Type
        content_type = response.headers.get('content-type', '').lower()
        if not content_type.startswith('image/'):
            print(f"Ungültiger Content-Type: {content_type}")
            return None
            
        print("Bild erfolgreich geladen, konvertiere...")
        try:
            # Versuche das Bild zu laden und zu validieren
            image = Image.open(BytesIO(response.content))
            # Versuche das Bild zu laden um sicherzustellen, dass es gültig ist
            image.load()
            
            print(f"Originalformat: {image.format}, Größe: {image.size}, Modus: {image.mode}")
            
            # Überprüfe Bildgröße
            if image.size[0] < 10 or image.size[1] < 10:
                print("Bild ist zu klein")
                return None
                
            if image.mode != "RGB":
                print(f"Konvertiere Bild von {image.mode} nach RGB...")
                image = image.convert("RGB")
                
            # Überprüfe ob das Bild nicht leer ist
            if not image.getbbox():
                print("Bild ist leer")
                return None
                
        except Exception as img_e:
            print(f"Fehler beim Öffnen/Konvertieren des Bildes: {img_e}")
            print("Versuche nächstes Bild...")
            return None
            
        try:
            print("Starte Bildanalyse mit BLIP2-Modell...")
            if mistral_context:
                # Verwende den Mistral-Kontext als zusätzliche Information
                prompt = f"Context: {mistral_context}\nBased on this context, provide a very detailed and comprehensive description of this image. Include all visible elements, actions, emotions, and atmosphere. Describe the scene as if you were explaining it to someone who cannot see it."
            else:
                prompt = "Provide a very detailed and comprehensive description of this image. Include all visible elements, actions, emotions, and atmosphere. Describe the scene as if you were explaining it to someone who cannot see it."
            
            inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cpu", dtype=torch.float32)
            print("Generiere Beschreibung...")
            outputs = model.generate(
                **inputs,
                max_new_tokens=200,
                num_beams=5,
                min_length=50,
                top_p=0.9,
                repetition_penalty=1.5,
                length_penalty=1.0,
                temperature=0.7,
                early_stopping=True
            )
            generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
            print(f"Rohe KI-Ausgabe: {generated_text}")
            
            # Entferne alle Prompt-Artefakte
            unwanted_phrases = [
                "context:",
                "based on this context",
                "describe what you see in this image",
                "in detail",
                "click on the image to enlarge",
                "in english",
                "now describe"
            ]
            
            cleaned_text = generated_text.lower()
            for phrase in unwanted_phrases:
                cleaned_text = cleaned_text.replace(phrase.lower(), "").strip()
            
            # Entferne mehrfache Leerzeichen und Punkte am Anfang
            cleaned_text = " ".join(cleaned_text.split())
            cleaned_text = cleaned_text.lstrip(".:")
            
            if not cleaned_text:
                print("WARNUNG: Keine verwertbare Beschreibung generiert!")
                return None
            
            print(f"Bereinigte Beschreibung: {cleaned_text}")
            print("Bildanalyse erfolgreich abgeschlossen!")
            return cleaned_text.strip()
            
        except Exception as model_e:
            print(f"Fehler bei der Modell-Inferenz: {model_e}")
            traceback.print_exc()
            return None
    except Exception as e:
        print(f"Fehler bei BLIP2-Analyse: {str(e)}")
        traceback.print_exc()
        return None

def vectorize_text(text):
    """Konvertiert Text in einen Vektor mit dem BGE Modell."""
    try:
        if not text:
            return None
        # Generiere Embedding
        embedding = sentence_model.encode(text, normalize_embeddings=True)
        # Konvertiere zu String für SQLite
        return json.dumps(embedding.tolist())
    except Exception as e:
        print(f"Fehler bei der Vektorisierung: {str(e)}")
        return None

def save_to_database(image_data):
    try:
        print("\nDEBUG: Versuche Datenbankverbindung herzustellen...")
        conn = sqlite3.connect('../database/images.db')
        cursor = conn.cursor()
        
        # Überprüfe, ob das Bild bereits existiert
        cursor.execute("SELECT id FROM images WHERE url = ?", (image_data['url'],))
        if cursor.fetchone():
            print(f"Bild mit URL {image_data['url']} existiert bereits in der Datenbank. Überspringe...")
            conn.close()
            return False
        
        print("DEBUG: Bereite SQL-Query vor...")
        query = """INSERT INTO images 
                  (title, url, wiki_description, artist, license, attribution, 
                   categories, imgwidth, imgheight, mime, date, long_description,
                   ollama_description, blip_description, ollama_vector, blip_vector, source)
                  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
        
        print("DEBUG: Führe SQL-Query aus...")
        print(f"DEBUG: Daten zum Einfügen: {image_data}")
        
        # Vektorisierung der Beschreibungen
        ollama_vector = vectorize_text(image_data.get('mistral_desc', ''))
        blip_vector = vectorize_text(image_data.get('blip2_description', ''))
        
        cursor.execute(query, (
            image_data['title'],
            image_data['url'],
            image_data.get('description', ''),
            image_data.get('creator', ''),
            image_data.get('license', ''),
            image_data.get('attribution', ''),
            ','.join(image_data.get('tags', [])),
            image_data.get('width', 0),
            image_data.get('height', 0),
            image_data.get('mime', ''),
            image_data.get('date', ''),
            '',                                      # long_description bleibt leer
            image_data.get('mistral_desc', ''),      # ollama_description
            image_data.get('blip2_description', ''), # blip_description
            ollama_vector,                           # ollama_vector
            blip_vector,                             # blip_vector
            'nasa'                                  # source
        ))
        
        print("DEBUG: Commit der Änderungen...")
        conn.commit()
        print("DEBUG: Schließe Datenbankverbindung...")
        conn.close()
        print("DEBUG: Datenbankoperation erfolgreich!")
        return True
    except Exception as e:
        print(f"FEHLER beim Speichern in der Datenbank: {str(e)}")
        print(f"FEHLER-Details: {traceback.format_exc()}")
        return False

def download_image():
    # NASA Image and Video Library API URL
    api_url = "https://images-api.nasa.gov/search"
    
    try:
        print("Sende Anfrage an NASA Image and Video Library API...")
        check_rate_limit()
        headers = {
            "User-Agent": "Mozilla/5.0 (compatible; ImageDownloader/1.0; +https://openverse.org)",
            "Accept": "application/json"
        }
        
        # Parameter für die API-Anfrage
        params = {
            "q": "space",  # Suchbegriff
            "media_type": "image",
            "page": random.randint(1, 100)  # Zufällige Seite zwischen 1 und 100
        }
        
        response = requests.get(api_url, params=params, headers=headers)
        print(f"API-Antwort Status: {response.status_code}")
        
        if response.status_code != 200:
            print(f"Fehler von NASA API: {response.status_code}")
            return False
        
        data = response.json()
        items = data.get('collection', {}).get('items', [])
        
        if not items:
            print("Keine Bilder gefunden")
            return False
        
        # Wähle ein zufälliges Bild
        item = random.choice(items)
        image_data = item.get('data', [{}])[0]
        image_links = item.get('links', [{}])[0]
        
        image_url = image_links.get('href', '')
        if not image_url:
            print("Keine Bild-URL gefunden")
            return False
            
        print(f"Gefundenes Bild: {image_data.get('title', 'Unbekannt')}")
        
        image_info = {
            'title': image_data.get('title', 'NASA Image'),
            'url': image_url,
            'description': image_data.get('description', ''),
            'creator': image_data.get('photographer', 'NASA'),
            'license': 'Public Domain',
            'attribution': 'NASA',
            'tags': image_data.get('keywords', []),
            'width': 0,
            'height': 0,
            'mime': 'image/jpeg',
            'date': image_data.get('date_created', datetime.now().strftime('%Y-%m-%d'))
        }
        
        # 1. Hole Mistral-Beschreibung
        print("Frage Mistral (Ollama) nach englischer Beschreibung...")
        mistral_desc = call_mistral_api(image_info)
        print(f"Mistral-Antwort: {mistral_desc}")
        image_info['mistral_desc'] = mistral_desc
        
        # 2. BLIP2-Analyse mit Mistral-Kontext
        print("Führe BLIP2-Analyse mit Mistral-Kontext durch...")
        context_info = {
            'title': image_info['title'],
            'description': image_info['description'],
            'creator': image_info['creator']
        }
        blip2_result = analyze_with_blip2(image_info['url'], context_info, mistral_context=mistral_desc)
        if blip2_result:
            image_info['blip2_description'] = blip2_result
            print(f"BLIP2-Analyse: {blip2_result}")
        else:
            print("BLIP2-Analyse fehlgeschlagen")
            return False
        
        if save_to_database(image_info):
            print(f"Bilddaten erfolgreich in der Datenbank gespeichert: {image_info['title']}")
            return True
        else:
            print("Fehler beim Speichern in der Datenbank")
    except Exception as e:
        print(f"Fehler beim Verarbeiten: {str(e)}")
        traceback.print_exc()
    return False

def main():
    # Sicherstellen, dass der database-Ordner existiert
    database_dir = os.path.join("..", "database")
    os.makedirs(database_dir, exist_ok=True)
    print(f"Database-Ordner: {os.path.abspath(database_dir)}")
    
    print("Starte Bilddaten-Import von NASA...")
    while True:
        success = download_image()
        if not success:
            print("Keine passenden Bilddaten gefunden, versuche erneut...")
        time.sleep(3)  # 3 Sekunden Pause nach jedem Versuch

if __name__ == "__main__":
    main() 