pcacc 3 týždňov pred
commit
7ff48d9b83
4 zmenil súbory, kde vykonal 244 pridanie a 0 odobranie
  1. 191 0
      gitHistoryManage.py
  2. 3 0
      requirement.txt
  3. 34 0
      scriptBase/base.py
  4. 16 0
      scriptBase/loki_log.py

+ 191 - 0
gitHistoryManage.py

@@ -0,0 +1,191 @@
+import git
+import os
+from datetime import datetime, timezone
+from neo4j import GraphDatabase
+import scriptBase.base
+
+# --- Neo4j 連線資訊 ---
+# 請替換為你自己的 Neo4j 伺服器地址、用戶名和密碼
+NEO4J_URI = "bolt://99.12.23.49:7687"
+NEO4J_USER = "neo4j"
+NEO4J_PASSWORD = "1234.cc-"
+
+# --- 新增功能:清除 Neo4j 資料庫 ---
+def clear_neo4j_database():
+    """
+    使用 Cypher 查詢清除 Neo4j 資料庫中的所有節點和關係。
+    警告:此操作不可逆!
+    """
+    try:
+        driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
+        with driver.session() as session:
+            # Cypher 查詢:DETACH DELETE 會在刪除節點的同時,先刪除與之相關的所有關係
+            print("正在清空 Neo4j 資料庫,此操作無法撤銷...")
+            session.run("MATCH (n) DETACH DELETE n")
+            print("資料庫已成功清空。")
+        driver.close()
+    except Exception as e:
+        print(f"清空 Neo4j 失敗: {e}")
+
+# --- 核心函數: Git 歷史提取與 Neo4j 匯入 ---
+def import_git_to_neo4j(repo_path, since=None):
+    """
+    提取 Git 倉庫的提交歷史,並直接匯入到 Neo4j 圖資料庫中。
+
+    Args:
+        repo_path (str): Git 倉庫的本地路徑。
+        since (datetime, optional): 獲取提交的起始時間。如果為 None,則全量獲取。
+    """
+    # 驗證 Git 倉庫路徑
+    if not os.path.exists(repo_path) or not git.repo.base.is_git_dir(os.path.join(repo_path, '.git')):
+        print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫路徑。")
+        return
+
+    try:
+        repo = git.Repo(repo_path)
+    except git.InvalidGitRepositoryError:
+        print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫。請檢查路徑。")
+        return
+        
+    # 連接 Neo4j 資料庫
+    try:
+        driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
+        print("成功連接到 Neo4j 資料庫。")
+    except Exception as e:
+        print(f"連接 Neo4j 失敗: {e}")
+        return
+
+    # 提交事務函數
+    def import_transaction(tx, commit_data):
+        # 創建或匹配作者節點
+        tx.run("""
+            MERGE (a:Author {email: $author_email})
+            ON CREATE SET a.name = $author_name
+            """, author_email=commit_data['author_email'], author_name=commit_data['author_name'])
+        
+        # 創建提交節點
+        tx.run("""
+            MERGE (c:Commit {id: $commit_id})
+            ON CREATE SET c.message = $message, c.timestamp = $timestamp
+            """, commit_id=commit_data['commit_id'], message=commit_data['commit_message'], timestamp=commit_data['committed_datetime'])
+        
+        # 創建作者與提交之間的關係
+        tx.run("""
+            MATCH (a:Author {email: $author_email})
+            MATCH (c:Commit {id: $commit_id})
+            MERGE (a)-[:AUTHORED]->(c)
+            """, author_email=commit_data['author_email'], commit_id=commit_data['commit_id'])
+
+        # 創建父提交關係
+        if commit_data['parent_id']:
+            tx.run("""
+                MATCH (parent:Commit {id: $parent_id})
+                MATCH (child:Commit {id: $child_id})
+                MERGE (child)-[:PARENT]->(parent)
+                """, parent_id=commit_data['parent_id'], child_id=commit_data['commit_id'])
+
+        # 創建文件變更關係
+        for file_change in commit_data['file_changes']:
+            # 創建或匹配文件節點
+            tx.run("""
+                MERGE (f:File {path: $file_path})
+                """, file_path=file_change['new_path'])
+            
+            # 創建提交與文件之間的修改關係
+            tx.run("""
+                MATCH (c:Commit {id: $commit_id})
+                MATCH (f:File {path: $file_path})
+                MERGE (c)-[:CHANGED {
+                    change_type: $change_type,
+                    lines_added: $lines_added,
+                    lines_deleted: $lines_deleted
+                }]->(f)
+                """,
+                commit_id=commit_data['commit_id'],
+                file_path=file_change['new_path'],
+                change_type=file_change['change_type'],
+                lines_added=file_change['lines_added'],
+                lines_deleted=file_change['lines_deleted'])
+
+    # 根據 'since' 參數選擇提交迭代器
+    if since:
+        print(f"正在進行增量匯入,從 {since.isoformat()} 開始...")
+        since_utc = since.astimezone(timezone.utc)
+        commits = repo.iter_commits('--all', after=since_utc)
+    else:
+        print("正在進行全量匯入...")
+        commits = repo.iter_commits('--all')
+    
+    with driver.session() as session:
+        for commit in commits:
+            # 排除合併提交
+            if len(commit.parents) > 1:
+                continue
+
+            file_changes = []
+            try:
+                diff_index = commit.diff(commit.parents[0] if commit.parents else git.NULL_TREE, create_patch=True)
+                for diff in diff_index:
+                    diff_text = diff.diff.decode('utf-8', errors='ignore')
+                    lines_added = diff_text.count('\n+') - diff_text.count('\n+++')
+                    lines_deleted = diff_text.count('\n-') - diff_text.count('\n---')
+                    change_type = diff.change_type if diff.change_type is not None else ""
+                    old_path = diff.a_path if diff.a_path is not None else ""
+                    new_path = diff.b_path if diff.b_path is not None else ""
+                    
+                    appendStr = {
+                        "change_type": change_type,
+                        "old_path": old_path,
+                        "new_path": new_path,
+                        "lines_added": lines_added,
+                        "lines_deleted": lines_deleted,
+                    }
+
+                    file_changes.append(appendStr)
+            except Exception as e:
+                print(f"警告: 處理提交 {commit.hexsha} 的文件變更時發生錯誤: {e}")
+                continue
+
+            parent_id = commit.parents[0].hexsha if commit.parents else None
+            commit_data = {
+                    "commit_id": commit.hexsha,
+                    "parent_id": parent_id,
+                    "author_name": commit.author.name,
+                    "author_email": commit.author.email,
+                    "committed_datetime": commit.committed_datetime.isoformat(),
+                    "commit_message": commit.message.strip(),
+                    "file_changes": file_changes
+                }
+            
+            try:
+                session.execute_write(import_transaction, commit_data)
+                print(f"已匯入提交: {commit.hexsha[:7]}, commit_data: {commit_data}")
+            except Exception as e:
+                print(f"警告: 匯入提交 {commit.hexsha[:7]} 時發生錯誤: {e}, commit_data: {commit_data}")
+
+    driver.close()
+    print("匯入完成,Neo4j 連接已關閉。")
+
+# --- 執行腳本 ---
+if __name__ == "__main__":
+    # 替換為你的 Git 倉庫路徑
+    repo_path = 'D:/mayun/LR04.02_RVCTerminalPlus'
+
+    # 步驟1: 清空資料庫
+    # 執行全量匯入前,通常會先清空資料庫以避免資料重複
+    print("----- 開始清空資料庫 -----")
+    clear_neo4j_database()
+    print("----- 資料庫清空完成 -----")
+    
+    # 步驟2:執行全量匯入
+    print("\n" + "="*50 + "\n")
+    print("----- 執行全量匯入 -----")
+    import_git_to_neo4j(repo_path)
+    print("----- 全量匯入完成 -----")
+    
+    # 示例2:執行增量匯入
+    # 設置增量獲取的起始時間,例如從 2024年7月1日 開始
+    # start_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)
+    # print("\n" + "="*50 + "\n")
+    # print("----- 執行增量匯入 -----")
+    # import_git_to_neo4j(repo_path, since=start_time)

+ 3 - 0
requirement.txt

@@ -0,0 +1,3 @@
+# pip install -r .\requirement.txt -i  http://central.jaf.cmbchina.cn/artifactory/api/pypi/group-pypi/simple  --trusted-host=central.jaf.cmbchina.cn
+GitPython
+neo4j 

+ 34 - 0
scriptBase/base.py

@@ -0,0 +1,34 @@
+import builtins
+import datetime
+import socket
+import threading
+from scriptBase.loki_log import *
+
+# 保存原始的 print 函数
+original_print = print
+
+# 定义自定义的输出函数
+def custom_print(*args, **kwargs):
+    # 获取当前时间
+    current_time = datetime.datetime.now()
+    # 添加时间前缀
+    prefix = f"[{current_time}]"
+    # 将时间前缀与要打印的内容拼接
+    modified_args = (prefix,) + args
+    # 调用原始的 print 函数打印内容
+    original_print(*modified_args, **kwargs)
+
+    dstStr = ' '.join(str(arg) for arg in args)
+    
+    log_thread = threading.Thread(
+        target=log_to_loki,
+        args=(socket.gethostname(), dstStr),
+        daemon=True  # 设为守护线程(主线程退出时自动结束)
+    )
+    log_thread.start()  # 启动线程(不阻塞主线程)
+    
+
+
+# 替换内置的 print 函数为自定义的输出函数
+builtins.print = custom_print
+

+ 16 - 0
scriptBase/loki_log.py

@@ -0,0 +1,16 @@
+import requests
+import time
+
+def log_to_loki(label : str, message : str):
+    log_data = {
+    "streams": [{
+        "stream": {"app": "git_history", "env": label},
+        "values": [[str(int(time.time()*1e9)), message]]
+        }]
+    }
+    requests.post("http://99.12.23.49:3100/loki/api/v1/push", json=log_data)
+
+
+
+if __name__ == '__main__':
+    log_to_loki('test', 'this is a test')